xref: /openbmc/linux/net/sched/sch_netem.c (revision 3cad70bc)
184a14ae8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
61da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
91da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
101da177e4SLinus Torvalds  */
111da177e4SLinus Torvalds 
12b7f080cfSAlexey Dobriyan #include <linux/mm.h>
131da177e4SLinus Torvalds #include <linux/module.h>
145a0e3ad6STejun Heo #include <linux/slab.h>
151da177e4SLinus Torvalds #include <linux/types.h>
161da177e4SLinus Torvalds #include <linux/kernel.h>
171da177e4SLinus Torvalds #include <linux/errno.h>
181da177e4SLinus Torvalds #include <linux/skbuff.h>
1978776d3fSDavid S. Miller #include <linux/vmalloc.h>
201da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2190b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
22aec0a40aSEric Dumazet #include <linux/rbtree.h>
231da177e4SLinus Torvalds 
24d457a0e3SEric Dumazet #include <net/gso.h>
25dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
261da177e4SLinus Torvalds #include <net/pkt_sched.h>
27e4ae004bSEric Dumazet #include <net/inet_ecn.h>
281da177e4SLinus Torvalds 
29250a65f7Sstephen hemminger #define VERSION "1.3"
30eb229c4cSStephen Hemminger 
311da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
321da177e4SLinus Torvalds 	====================================
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
351da177e4SLinus Torvalds 		 Network Emulation Tool
361da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds 	 ----------------------------------------------------------------
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
411da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
421da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
431da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
441da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
451da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
461da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
491da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
501da177e4SLinus Torvalds 	 control either since that can be handled by using token
511da177e4SLinus Torvalds 	 bucket or other rate control.
52661b7972Sstephen hemminger 
53661b7972Sstephen hemminger      Correlated Loss Generator models
54661b7972Sstephen hemminger 
55661b7972Sstephen hemminger 	Added generation of correlated loss according to the
56661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
57661b7972Sstephen hemminger 
58661b7972Sstephen hemminger 	References:
59661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
60661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
61661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
62661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
63661b7972Sstephen hemminger 
64661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
65661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
661da177e4SLinus Torvalds */
671da177e4SLinus Torvalds 
680a9fe5c3SYousuk Seung struct disttable {
690a9fe5c3SYousuk Seung 	u32  size;
70b90feaffSGustavo A. R. Silva 	s16 table[];
710a9fe5c3SYousuk Seung };
720a9fe5c3SYousuk Seung 
731da177e4SLinus Torvalds struct netem_sched_data {
74aec0a40aSEric Dumazet 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
75aec0a40aSEric Dumazet 	struct rb_root t_root;
7650612537SEric Dumazet 
77d66280b1SPeter Oskolkov 	/* a linear queue; reduces rbtree rebalancing when jitter is low */
78d66280b1SPeter Oskolkov 	struct sk_buff	*t_head;
79d66280b1SPeter Oskolkov 	struct sk_buff	*t_tail;
80d66280b1SPeter Oskolkov 
8150612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
821da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
8350612537SEric Dumazet 
8459cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
851da177e4SLinus Torvalds 
86112f9cb6SDave Taht 	s64 latency;
87112f9cb6SDave Taht 	s64 jitter;
88b407621cSStephen Hemminger 
891da177e4SLinus Torvalds 	u32 loss;
90e4ae004bSEric Dumazet 	u32 ecn;
911da177e4SLinus Torvalds 	u32 limit;
921da177e4SLinus Torvalds 	u32 counter;
931da177e4SLinus Torvalds 	u32 gap;
941da177e4SLinus Torvalds 	u32 duplicate;
950dca51d3SStephen Hemminger 	u32 reorder;
96c865e5d9SStephen Hemminger 	u32 corrupt;
976a031f67SYang Yingliang 	u64 rate;
9890b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9990b41a1cSHagen Paul Pfeifer 	u32 cell_size;
100809fa972SHannes Frederic Sowa 	struct reciprocal_value cell_size_reciprocal;
10190b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
1021da177e4SLinus Torvalds 
1031da177e4SLinus Torvalds 	struct crndstate {
104b407621cSStephen Hemminger 		u32 last;
105b407621cSStephen Hemminger 		u32 rho;
106c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1071da177e4SLinus Torvalds 
1084072d97dSFrançois Michel 	struct prng  {
1094072d97dSFrançois Michel 		u64 seed;
1104072d97dSFrançois Michel 		struct rnd_state prng_state;
1114072d97dSFrançois Michel 	} prng;
1124072d97dSFrançois Michel 
1130a9fe5c3SYousuk Seung 	struct disttable *delay_dist;
114661b7972Sstephen hemminger 
115661b7972Sstephen hemminger 	enum  {
116661b7972Sstephen hemminger 		CLG_RANDOM,
117661b7972Sstephen hemminger 		CLG_4_STATES,
118661b7972Sstephen hemminger 		CLG_GILB_ELL,
119661b7972Sstephen hemminger 	} loss_model;
120661b7972Sstephen hemminger 
121a6e2fe17SYang Yingliang 	enum {
122a6e2fe17SYang Yingliang 		TX_IN_GAP_PERIOD = 1,
123a6e2fe17SYang Yingliang 		TX_IN_BURST_PERIOD,
124a6e2fe17SYang Yingliang 		LOST_IN_GAP_PERIOD,
125a6e2fe17SYang Yingliang 		LOST_IN_BURST_PERIOD,
126a6e2fe17SYang Yingliang 	} _4_state_model;
127a6e2fe17SYang Yingliang 
128c045a734SYang Yingliang 	enum {
129c045a734SYang Yingliang 		GOOD_STATE = 1,
130c045a734SYang Yingliang 		BAD_STATE,
131c045a734SYang Yingliang 	} GE_state_model;
132c045a734SYang Yingliang 
133661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
134661b7972Sstephen hemminger 	struct clgstate {
135661b7972Sstephen hemminger 		/* state of the Markov chain */
136661b7972Sstephen hemminger 		u8 state;
137661b7972Sstephen hemminger 
138661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
139661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
140661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
141661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
142661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
143661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
144661b7972Sstephen hemminger 	} clg;
145661b7972Sstephen hemminger 
146836af83bSDave Taht 	struct tc_netem_slot slot_config;
147836af83bSDave Taht 	struct slotstate {
148836af83bSDave Taht 		u64 slot_next;
149836af83bSDave Taht 		s32 packets_left;
150836af83bSDave Taht 		s32 bytes_left;
151836af83bSDave Taht 	} slot;
152836af83bSDave Taht 
1530a9fe5c3SYousuk Seung 	struct disttable *slot_dist;
1541da177e4SLinus Torvalds };
1551da177e4SLinus Torvalds 
15650612537SEric Dumazet /* Time stamp put into socket buffer control block
15750612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
15856b17425SEric Dumazet  *
15956b17425SEric Dumazet  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
16056b17425SEric Dumazet  * and skb->next & skb->prev are scratch space for a qdisc,
16156b17425SEric Dumazet  * we save skb->tstamp value in skb->cb[] before destroying it.
16250612537SEric Dumazet  */
1631da177e4SLinus Torvalds struct netem_skb_cb {
164112f9cb6SDave Taht 	u64	        time_to_send;
1651da177e4SLinus Torvalds };
1661da177e4SLinus Torvalds 
netem_skb_cb(struct sk_buff * skb)1675f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1685f86173bSJussi Kivilinna {
169aec0a40aSEric Dumazet 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
17016bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
171175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1725f86173bSJussi Kivilinna }
1735f86173bSJussi Kivilinna 
1741da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1751da177e4SLinus Torvalds  * Use entropy source for initial seed.
1761da177e4SLinus Torvalds  */
init_crandom(struct crndstate * state,unsigned long rho)1771da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1781da177e4SLinus Torvalds {
1791da177e4SLinus Torvalds 	state->rho = rho;
180a251c17aSJason A. Donenfeld 	state->last = get_random_u32();
1811da177e4SLinus Torvalds }
1821da177e4SLinus Torvalds 
1831da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1841da177e4SLinus Torvalds  * Next number depends on last value.
1851da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1861da177e4SLinus Torvalds  */
get_crandom(struct crndstate * state,struct prng * p)187*3cad70bcSFrançois Michel static u32 get_crandom(struct crndstate *state, struct prng *p)
1881da177e4SLinus Torvalds {
1891da177e4SLinus Torvalds 	u64 value, rho;
1901da177e4SLinus Torvalds 	unsigned long answer;
191*3cad70bcSFrançois Michel 	struct rnd_state *s = &p->prng_state;
1921da177e4SLinus Torvalds 
1930a9fe5c3SYousuk Seung 	if (!state || state->rho == 0)	/* no correlation */
194*3cad70bcSFrançois Michel 		return prandom_u32_state(s);
1951da177e4SLinus Torvalds 
196*3cad70bcSFrançois Michel 	value = prandom_u32_state(s);
1971da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1981da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1991da177e4SLinus Torvalds 	state->last = answer;
2001da177e4SLinus Torvalds 	return answer;
2011da177e4SLinus Torvalds }
2021da177e4SLinus Torvalds 
203661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
204661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
205661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
206661b7972Sstephen hemminger  */
loss_4state(struct netem_sched_data * q)207661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
208661b7972Sstephen hemminger {
209661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
2109c87b2aeSFrançois Michel 	u32 rnd = prandom_u32_state(&q->prng.prng_state);
211661b7972Sstephen hemminger 
212661b7972Sstephen hemminger 	/*
21325985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
214661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
215661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
216661b7972Sstephen hemminger 	 * The four states correspond to:
217a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
218cb3ef7b0SHarshit Mogalapalli 	 *   LOST_IN_GAP_PERIOD => isolated losses within a gap period
219cb3ef7b0SHarshit Mogalapalli 	 *   LOST_IN_BURST_PERIOD => lost packets within a burst period
220cb3ef7b0SHarshit Mogalapalli 	 *   TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period
221661b7972Sstephen hemminger 	 */
222661b7972Sstephen hemminger 	switch (clg->state) {
223a6e2fe17SYang Yingliang 	case TX_IN_GAP_PERIOD:
224661b7972Sstephen hemminger 		if (rnd < clg->a4) {
225cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_GAP_PERIOD;
226661b7972Sstephen hemminger 			return true;
227ab6c27beSstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
228cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_BURST_PERIOD;
229661b7972Sstephen hemminger 			return true;
230a6e2fe17SYang Yingliang 		} else if (clg->a1 + clg->a4 < rnd) {
231a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
232a6e2fe17SYang Yingliang 		}
233661b7972Sstephen hemminger 
234661b7972Sstephen hemminger 		break;
235a6e2fe17SYang Yingliang 	case TX_IN_BURST_PERIOD:
236661b7972Sstephen hemminger 		if (rnd < clg->a5) {
237cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_BURST_PERIOD;
238661b7972Sstephen hemminger 			return true;
239a6e2fe17SYang Yingliang 		} else {
240a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
241a6e2fe17SYang Yingliang 		}
242661b7972Sstephen hemminger 
243661b7972Sstephen hemminger 		break;
244cb3ef7b0SHarshit Mogalapalli 	case LOST_IN_BURST_PERIOD:
245661b7972Sstephen hemminger 		if (rnd < clg->a3)
246a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
247661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
248a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
249661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
250cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_BURST_PERIOD;
251661b7972Sstephen hemminger 			return true;
252661b7972Sstephen hemminger 		}
253661b7972Sstephen hemminger 		break;
254cb3ef7b0SHarshit Mogalapalli 	case LOST_IN_GAP_PERIOD:
255a6e2fe17SYang Yingliang 		clg->state = TX_IN_GAP_PERIOD;
256661b7972Sstephen hemminger 		break;
257661b7972Sstephen hemminger 	}
258661b7972Sstephen hemminger 
259661b7972Sstephen hemminger 	return false;
260661b7972Sstephen hemminger }
261661b7972Sstephen hemminger 
262661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
263661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
264661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
265661b7972Sstephen hemminger  *
26625985edcSLucas De Marchi  * Makes a comparison between random number and the transition
267661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
26825985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
269661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
270661b7972Sstephen hemminger  * packet will be transmitted or lost.
271661b7972Sstephen hemminger  */
loss_gilb_ell(struct netem_sched_data * q)272661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
273661b7972Sstephen hemminger {
274661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
2759c87b2aeSFrançois Michel 	struct rnd_state *s = &q->prng.prng_state;
276661b7972Sstephen hemminger 
277661b7972Sstephen hemminger 	switch (clg->state) {
278c045a734SYang Yingliang 	case GOOD_STATE:
2799c87b2aeSFrançois Michel 		if (prandom_u32_state(s) < clg->a1)
280c045a734SYang Yingliang 			clg->state = BAD_STATE;
2819c87b2aeSFrançois Michel 		if (prandom_u32_state(s) < clg->a4)
282661b7972Sstephen hemminger 			return true;
2837c2781faSstephen hemminger 		break;
284c045a734SYang Yingliang 	case BAD_STATE:
2859c87b2aeSFrançois Michel 		if (prandom_u32_state(s) < clg->a2)
286c045a734SYang Yingliang 			clg->state = GOOD_STATE;
2879c87b2aeSFrançois Michel 		if (prandom_u32_state(s) > clg->a3)
288661b7972Sstephen hemminger 			return true;
289661b7972Sstephen hemminger 	}
290661b7972Sstephen hemminger 
291661b7972Sstephen hemminger 	return false;
292661b7972Sstephen hemminger }
293661b7972Sstephen hemminger 
loss_event(struct netem_sched_data * q)294661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
295661b7972Sstephen hemminger {
296661b7972Sstephen hemminger 	switch (q->loss_model) {
297661b7972Sstephen hemminger 	case CLG_RANDOM:
298661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
299*3cad70bcSFrançois Michel 		return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
300661b7972Sstephen hemminger 
301661b7972Sstephen hemminger 	case CLG_4_STATES:
302661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
303661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
304661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
305661b7972Sstephen hemminger 		* the kernel logs
306661b7972Sstephen hemminger 		*/
307661b7972Sstephen hemminger 		return loss_4state(q);
308661b7972Sstephen hemminger 
309661b7972Sstephen hemminger 	case CLG_GILB_ELL:
310661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
311661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
312661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
313661b7972Sstephen hemminger 		* the kernel logs
314661b7972Sstephen hemminger 		*/
315661b7972Sstephen hemminger 		return loss_gilb_ell(q);
316661b7972Sstephen hemminger 	}
317661b7972Sstephen hemminger 
318661b7972Sstephen hemminger 	return false;	/* not reached */
319661b7972Sstephen hemminger }
320661b7972Sstephen hemminger 
321661b7972Sstephen hemminger 
3221da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
3231da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
3241da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
3251da177e4SLinus Torvalds  */
tabledist(s64 mu,s32 sigma,struct crndstate * state,struct prng * prng,const struct disttable * dist)3269b0ed891SStephen Hemminger static s64 tabledist(s64 mu, s32 sigma,
327b407621cSStephen Hemminger 		     struct crndstate *state,
328*3cad70bcSFrançois Michel 		     struct prng *prng,
329b407621cSStephen Hemminger 		     const struct disttable *dist)
3301da177e4SLinus Torvalds {
331112f9cb6SDave Taht 	s64 x;
332b407621cSStephen Hemminger 	long t;
333b407621cSStephen Hemminger 	u32 rnd;
3341da177e4SLinus Torvalds 
3351da177e4SLinus Torvalds 	if (sigma == 0)
3361da177e4SLinus Torvalds 		return mu;
3371da177e4SLinus Torvalds 
338*3cad70bcSFrançois Michel 	rnd = get_crandom(state, prng);
3391da177e4SLinus Torvalds 
3401da177e4SLinus Torvalds 	/* default uniform distribution */
3411da177e4SLinus Torvalds 	if (dist == NULL)
342eadd1befSAleksandr Nogikh 		return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3451da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3461da177e4SLinus Torvalds 	if (x >= 0)
3471da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3481da177e4SLinus Torvalds 	else
3491da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3501da177e4SLinus Torvalds 
3511da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3521da177e4SLinus Torvalds }
3531da177e4SLinus Torvalds 
packet_time_ns(u64 len,const struct netem_sched_data * q)354bce552fdSStephen Hemminger static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
3557bc0f28cSHagen Paul Pfeifer {
35690b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
35790b41a1cSHagen Paul Pfeifer 
35890b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
35990b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
36090b41a1cSHagen Paul Pfeifer 
36190b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
36290b41a1cSHagen Paul Pfeifer 			cells++;
36390b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
36490b41a1cSHagen Paul Pfeifer 	}
365bce552fdSStephen Hemminger 
366bce552fdSStephen Hemminger 	return div64_u64(len * NSEC_PER_SEC, q->rate);
3677bc0f28cSHagen Paul Pfeifer }
3687bc0f28cSHagen Paul Pfeifer 
tfifo_reset(struct Qdisc * sch)369ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch)
370ff704050Sstephen hemminger {
371ff704050Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3723aa605f2SEric Dumazet 	struct rb_node *p = rb_first(&q->t_root);
373ff704050Sstephen hemminger 
3743aa605f2SEric Dumazet 	while (p) {
37518a4c0eaSEric Dumazet 		struct sk_buff *skb = rb_to_skb(p);
376ff704050Sstephen hemminger 
3773aa605f2SEric Dumazet 		p = rb_next(p);
3783aa605f2SEric Dumazet 		rb_erase(&skb->rbnode, &q->t_root);
3792f08a9a1SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
380ff704050Sstephen hemminger 	}
381d66280b1SPeter Oskolkov 
382d66280b1SPeter Oskolkov 	rtnl_kfree_skbs(q->t_head, q->t_tail);
383d66280b1SPeter Oskolkov 	q->t_head = NULL;
384d66280b1SPeter Oskolkov 	q->t_tail = NULL;
385ff704050Sstephen hemminger }
386ff704050Sstephen hemminger 
tfifo_enqueue(struct sk_buff * nskb,struct Qdisc * sch)387960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
38850612537SEric Dumazet {
389aec0a40aSEric Dumazet 	struct netem_sched_data *q = qdisc_priv(sch);
390112f9cb6SDave Taht 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
391d66280b1SPeter Oskolkov 
392d66280b1SPeter Oskolkov 	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
393d66280b1SPeter Oskolkov 		if (q->t_tail)
394d66280b1SPeter Oskolkov 			q->t_tail->next = nskb;
395d66280b1SPeter Oskolkov 		else
396d66280b1SPeter Oskolkov 			q->t_head = nskb;
397d66280b1SPeter Oskolkov 		q->t_tail = nskb;
398d66280b1SPeter Oskolkov 	} else {
399aec0a40aSEric Dumazet 		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
40050612537SEric Dumazet 
401aec0a40aSEric Dumazet 		while (*p) {
402aec0a40aSEric Dumazet 			struct sk_buff *skb;
40350612537SEric Dumazet 
404aec0a40aSEric Dumazet 			parent = *p;
40518a4c0eaSEric Dumazet 			skb = rb_to_skb(parent);
40650612537SEric Dumazet 			if (tnext >= netem_skb_cb(skb)->time_to_send)
407aec0a40aSEric Dumazet 				p = &parent->rb_right;
408aec0a40aSEric Dumazet 			else
409aec0a40aSEric Dumazet 				p = &parent->rb_left;
41050612537SEric Dumazet 		}
41156b17425SEric Dumazet 		rb_link_node(&nskb->rbnode, parent, p);
41256b17425SEric Dumazet 		rb_insert_color(&nskb->rbnode, &q->t_root);
413d66280b1SPeter Oskolkov 	}
414aec0a40aSEric Dumazet 	sch->q.qlen++;
41550612537SEric Dumazet }
41650612537SEric Dumazet 
4176071bd1aSNeil Horman /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
4186071bd1aSNeil Horman  * when we statistically choose to corrupt one, we instead segment it, returning
4196071bd1aSNeil Horman  * the first packet to be corrupted, and re-enqueue the remaining frames
4206071bd1aSNeil Horman  */
netem_segment(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)421520ac30fSEric Dumazet static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
422520ac30fSEric Dumazet 				     struct sk_buff **to_free)
4236071bd1aSNeil Horman {
4246071bd1aSNeil Horman 	struct sk_buff *segs;
4256071bd1aSNeil Horman 	netdev_features_t features = netif_skb_features(skb);
4266071bd1aSNeil Horman 
4276071bd1aSNeil Horman 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
4286071bd1aSNeil Horman 
4296071bd1aSNeil Horman 	if (IS_ERR_OR_NULL(segs)) {
430520ac30fSEric Dumazet 		qdisc_drop(skb, sch, to_free);
4316071bd1aSNeil Horman 		return NULL;
4326071bd1aSNeil Horman 	}
4336071bd1aSNeil Horman 	consume_skb(skb);
4346071bd1aSNeil Horman 	return segs;
4356071bd1aSNeil Horman }
4366071bd1aSNeil Horman 
4370afb51e7SStephen Hemminger /*
4380afb51e7SStephen Hemminger  * Insert one skb into qdisc.
4390afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
4400afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
4410afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
4420afb51e7SStephen Hemminger  */
netem_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)443520ac30fSEric Dumazet static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
444520ac30fSEric Dumazet 			 struct sk_buff **to_free)
4451da177e4SLinus Torvalds {
4461da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
44789e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
44889e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
4490afb51e7SStephen Hemminger 	struct sk_buff *skb2;
4506071bd1aSNeil Horman 	struct sk_buff *segs = NULL;
451177b8007SJakub Kicinski 	unsigned int prev_len = qdisc_pkt_len(skb);
4520afb51e7SStephen Hemminger 	int count = 1;
4536071bd1aSNeil Horman 	int rc = NET_XMIT_SUCCESS;
4545845f706SSheng Lan 	int rc_drop = NET_XMIT_DROP;
4551da177e4SLinus Torvalds 
4569410d386SChristoph Paasch 	/* Do not fool qdisc_drop_all() */
4579410d386SChristoph Paasch 	skb->prev = NULL;
4589410d386SChristoph Paasch 
4590afb51e7SStephen Hemminger 	/* Random duplication */
460*3cad70bcSFrançois Michel 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
4610afb51e7SStephen Hemminger 		++count;
4620afb51e7SStephen Hemminger 
463661b7972Sstephen hemminger 	/* Drop packet? */
464e4ae004bSEric Dumazet 	if (loss_event(q)) {
465e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
46625331d6cSJohn Fastabend 			qdisc_qstats_drop(sch); /* mark packet */
467e4ae004bSEric Dumazet 		else
4680afb51e7SStephen Hemminger 			--count;
469e4ae004bSEric Dumazet 	}
4700afb51e7SStephen Hemminger 	if (count == 0) {
47125331d6cSJohn Fastabend 		qdisc_qstats_drop(sch);
472520ac30fSEric Dumazet 		__qdisc_drop(skb, to_free);
473c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
4741da177e4SLinus Torvalds 	}
4751da177e4SLinus Torvalds 
4765a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
4775a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
4785a308f40SEric Dumazet 	 */
4795080f39eSNik Unger 	if (q->latency || q->jitter || q->rate)
480f2f872f9SEric Dumazet 		skb_orphan_partial(skb);
4814e8a5201SDavid S. Miller 
4820afb51e7SStephen Hemminger 	/*
4830afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
4840afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
4850afb51e7SStephen Hemminger 	 * skb will be queued.
486d5d75cd6SStephen Hemminger 	 */
4870afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
488159d2c7dSEric Dumazet 		struct Qdisc *rootq = qdisc_root_bh(sch);
4890afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
490d5d75cd6SStephen Hemminger 
491b396cca6SEric Dumazet 		q->duplicate = 0;
492520ac30fSEric Dumazet 		rootq->enqueue(skb2, rootq, to_free);
4930afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4945845f706SSheng Lan 		rc_drop = NET_XMIT_SUCCESS;
4951da177e4SLinus Torvalds 	}
4961da177e4SLinus Torvalds 
497c865e5d9SStephen Hemminger 	/*
498c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
499c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
500c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
501c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
502c865e5d9SStephen Hemminger 	 */
503*3cad70bcSFrançois Michel 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
5046071bd1aSNeil Horman 		if (skb_is_gso(skb)) {
5053e14c383SJakub Kicinski 			skb = netem_segment(skb, sch, to_free);
5063e14c383SJakub Kicinski 			if (!skb)
5075845f706SSheng Lan 				return rc_drop;
5083e14c383SJakub Kicinski 			segs = skb->next;
5093e14c383SJakub Kicinski 			skb_mark_not_on_list(skb);
5103e14c383SJakub Kicinski 			qdisc_skb_cb(skb)->pkt_len = skb->len;
5116071bd1aSNeil Horman 		}
5126071bd1aSNeil Horman 
5138a6e9c67SEric Dumazet 		skb = skb_unshare(skb, GFP_ATOMIC);
5148a6e9c67SEric Dumazet 		if (unlikely(!skb)) {
5158a6e9c67SEric Dumazet 			qdisc_qstats_drop(sch);
5168a6e9c67SEric Dumazet 			goto finish_segs;
5178a6e9c67SEric Dumazet 		}
5188a6e9c67SEric Dumazet 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
5198a6e9c67SEric Dumazet 		    skb_checksum_help(skb)) {
5208a6e9c67SEric Dumazet 			qdisc_drop(skb, sch, to_free);
521a7fa12d1SJakub Kicinski 			skb = NULL;
5226071bd1aSNeil Horman 			goto finish_segs;
5236071bd1aSNeil Horman 		}
524c865e5d9SStephen Hemminger 
5258032bf12SJason A. Donenfeld 		skb->data[get_random_u32_below(skb_headlen(skb))] ^=
5268032bf12SJason A. Donenfeld 			1<<get_random_u32_below(8);
527c865e5d9SStephen Hemminger 	}
528c865e5d9SStephen Hemminger 
5295845f706SSheng Lan 	if (unlikely(sch->q.qlen >= sch->limit)) {
5303e14c383SJakub Kicinski 		/* re-link segs, so that qdisc_drop_all() frees them all */
5313e14c383SJakub Kicinski 		skb->next = segs;
5325845f706SSheng Lan 		qdisc_drop_all(skb, sch, to_free);
5335845f706SSheng Lan 		return rc_drop;
5345845f706SSheng Lan 	}
535960fb66eSEric Dumazet 
53625331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
537960fb66eSEric Dumazet 
5385f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
539f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
540a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
541*3cad70bcSFrançois Michel 	    q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
542112f9cb6SDave Taht 		u64 now;
543112f9cb6SDave Taht 		s64 delay;
54407aaa115SStephen Hemminger 
54507aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
546*3cad70bcSFrançois Michel 				  &q->delay_cor, &q->prng, q->delay_dist);
54707aaa115SStephen Hemminger 
548112f9cb6SDave Taht 		now = ktime_get_ns();
5497bc0f28cSHagen Paul Pfeifer 
5507bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
5515080f39eSNik Unger 			struct netem_skb_cb *last = NULL;
5527bc0f28cSHagen Paul Pfeifer 
5535080f39eSNik Unger 			if (sch->q.tail)
5545080f39eSNik Unger 				last = netem_skb_cb(sch->q.tail);
5555080f39eSNik Unger 			if (q->t_root.rb_node) {
5565080f39eSNik Unger 				struct sk_buff *t_skb;
5575080f39eSNik Unger 				struct netem_skb_cb *t_last;
5585080f39eSNik Unger 
55918a4c0eaSEric Dumazet 				t_skb = skb_rb_last(&q->t_root);
5605080f39eSNik Unger 				t_last = netem_skb_cb(t_skb);
5615080f39eSNik Unger 				if (!last ||
562d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
5635080f39eSNik Unger 					last = t_last;
5645080f39eSNik Unger 			}
565d66280b1SPeter Oskolkov 			if (q->t_tail) {
566d66280b1SPeter Oskolkov 				struct netem_skb_cb *t_last =
567d66280b1SPeter Oskolkov 					netem_skb_cb(q->t_tail);
568d66280b1SPeter Oskolkov 
569d66280b1SPeter Oskolkov 				if (!last ||
570d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
571d66280b1SPeter Oskolkov 					last = t_last;
5725080f39eSNik Unger 			}
5735080f39eSNik Unger 
574aec0a40aSEric Dumazet 			if (last) {
5757bc0f28cSHagen Paul Pfeifer 				/*
576a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
577a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
5787bc0f28cSHagen Paul Pfeifer 				 * from delay.
5797bc0f28cSHagen Paul Pfeifer 				 */
5805080f39eSNik Unger 				delay -= last->time_to_send - now;
581112f9cb6SDave Taht 				delay = max_t(s64, 0, delay);
5825080f39eSNik Unger 				now = last->time_to_send;
5837bc0f28cSHagen Paul Pfeifer 			}
584a13d3104SJohannes Naab 
585bce552fdSStephen Hemminger 			delay += packet_time_ns(qdisc_pkt_len(skb), q);
5867bc0f28cSHagen Paul Pfeifer 		}
5877bc0f28cSHagen Paul Pfeifer 
5887c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
5891da177e4SLinus Torvalds 		++q->counter;
590960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
5911da177e4SLinus Torvalds 	} else {
5920dca51d3SStephen Hemminger 		/*
5930dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
5940dca51d3SStephen Hemminger 		 * of the queue.
5950dca51d3SStephen Hemminger 		 */
596112f9cb6SDave Taht 		cb->time_to_send = ktime_get_ns();
5970dca51d3SStephen Hemminger 		q->counter = 0;
5988ba25dadSJarek Poplawski 
59959697730SDavid S. Miller 		__qdisc_enqueue_head(skb, &sch->q);
600eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
601378a2f09SJarek Poplawski 	}
6021da177e4SLinus Torvalds 
6036071bd1aSNeil Horman finish_segs:
6046071bd1aSNeil Horman 	if (segs) {
605177b8007SJakub Kicinski 		unsigned int len, last_len;
606a7fa12d1SJakub Kicinski 		int nb;
607177b8007SJakub Kicinski 
608a7fa12d1SJakub Kicinski 		len = skb ? skb->len : 0;
609a7fa12d1SJakub Kicinski 		nb = skb ? 1 : 0;
610177b8007SJakub Kicinski 
6116071bd1aSNeil Horman 		while (segs) {
6126071bd1aSNeil Horman 			skb2 = segs->next;
613a8305bffSDavid S. Miller 			skb_mark_not_on_list(segs);
6146071bd1aSNeil Horman 			qdisc_skb_cb(segs)->pkt_len = segs->len;
6156071bd1aSNeil Horman 			last_len = segs->len;
616520ac30fSEric Dumazet 			rc = qdisc_enqueue(segs, sch, to_free);
6176071bd1aSNeil Horman 			if (rc != NET_XMIT_SUCCESS) {
6186071bd1aSNeil Horman 				if (net_xmit_drop_count(rc))
6196071bd1aSNeil Horman 					qdisc_qstats_drop(sch);
6206071bd1aSNeil Horman 			} else {
6216071bd1aSNeil Horman 				nb++;
6226071bd1aSNeil Horman 				len += last_len;
6236071bd1aSNeil Horman 			}
6246071bd1aSNeil Horman 			segs = skb2;
6256071bd1aSNeil Horman 		}
626a7fa12d1SJakub Kicinski 		/* Parent qdiscs accounted for 1 skb of size @prev_len */
627a7fa12d1SJakub Kicinski 		qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
628e0ad032eSJakub Kicinski 	} else if (!skb) {
629e0ad032eSJakub Kicinski 		return NET_XMIT_DROP;
6306071bd1aSNeil Horman 	}
63110f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
6321da177e4SLinus Torvalds }
6331da177e4SLinus Torvalds 
634836af83bSDave Taht /* Delay the next round with a new future slot with a
635836af83bSDave Taht  * correct number of bytes and packets.
636836af83bSDave Taht  */
637836af83bSDave Taht 
get_slot_next(struct netem_sched_data * q,u64 now)638836af83bSDave Taht static void get_slot_next(struct netem_sched_data *q, u64 now)
639836af83bSDave Taht {
6400a9fe5c3SYousuk Seung 	s64 next_delay;
6410a9fe5c3SYousuk Seung 
6420a9fe5c3SYousuk Seung 	if (!q->slot_dist)
6430a9fe5c3SYousuk Seung 		next_delay = q->slot_config.min_delay +
644a251c17aSJason A. Donenfeld 				(get_random_u32() *
645836af83bSDave Taht 				 (q->slot_config.max_delay -
646836af83bSDave Taht 				  q->slot_config.min_delay) >> 32);
6470a9fe5c3SYousuk Seung 	else
6480a9fe5c3SYousuk Seung 		next_delay = tabledist(q->slot_config.dist_delay,
6490a9fe5c3SYousuk Seung 				       (s32)(q->slot_config.dist_jitter),
650*3cad70bcSFrançois Michel 				       NULL, &q->prng, q->slot_dist);
6510a9fe5c3SYousuk Seung 
6520a9fe5c3SYousuk Seung 	q->slot.slot_next = now + next_delay;
653836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
654836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
655836af83bSDave Taht }
656836af83bSDave Taht 
netem_peek(struct netem_sched_data * q)657d66280b1SPeter Oskolkov static struct sk_buff *netem_peek(struct netem_sched_data *q)
658d66280b1SPeter Oskolkov {
659d66280b1SPeter Oskolkov 	struct sk_buff *skb = skb_rb_first(&q->t_root);
660d66280b1SPeter Oskolkov 	u64 t1, t2;
661d66280b1SPeter Oskolkov 
662d66280b1SPeter Oskolkov 	if (!skb)
663d66280b1SPeter Oskolkov 		return q->t_head;
664d66280b1SPeter Oskolkov 	if (!q->t_head)
665d66280b1SPeter Oskolkov 		return skb;
666d66280b1SPeter Oskolkov 
667d66280b1SPeter Oskolkov 	t1 = netem_skb_cb(skb)->time_to_send;
668d66280b1SPeter Oskolkov 	t2 = netem_skb_cb(q->t_head)->time_to_send;
669d66280b1SPeter Oskolkov 	if (t1 < t2)
670d66280b1SPeter Oskolkov 		return skb;
671d66280b1SPeter Oskolkov 	return q->t_head;
672d66280b1SPeter Oskolkov }
673d66280b1SPeter Oskolkov 
netem_erase_head(struct netem_sched_data * q,struct sk_buff * skb)674d66280b1SPeter Oskolkov static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
675d66280b1SPeter Oskolkov {
676d66280b1SPeter Oskolkov 	if (skb == q->t_head) {
677d66280b1SPeter Oskolkov 		q->t_head = skb->next;
678d66280b1SPeter Oskolkov 		if (!q->t_head)
679d66280b1SPeter Oskolkov 			q->t_tail = NULL;
680d66280b1SPeter Oskolkov 	} else {
681d66280b1SPeter Oskolkov 		rb_erase(&skb->rbnode, &q->t_root);
682d66280b1SPeter Oskolkov 	}
683d66280b1SPeter Oskolkov }
684d66280b1SPeter Oskolkov 
netem_dequeue(struct Qdisc * sch)6851da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
6861da177e4SLinus Torvalds {
6871da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6881da177e4SLinus Torvalds 	struct sk_buff *skb;
6891da177e4SLinus Torvalds 
69050612537SEric Dumazet tfifo_dequeue:
691ed760cb8SFlorian Westphal 	skb = __qdisc_dequeue_head(&sch->q);
692771018e7SStephen Hemminger 	if (skb) {
69325331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
6940ad2a836SBeshay, Joseph deliver:
695aec0a40aSEric Dumazet 		qdisc_bstats_update(sch, skb);
696aec0a40aSEric Dumazet 		return skb;
697aec0a40aSEric Dumazet 	}
698d66280b1SPeter Oskolkov 	skb = netem_peek(q);
699d66280b1SPeter Oskolkov 	if (skb) {
700112f9cb6SDave Taht 		u64 time_to_send;
701836af83bSDave Taht 		u64 now = ktime_get_ns();
70236b7bfe0SEric Dumazet 
7030f9f32acSStephen Hemminger 		/* if more time remaining? */
70436b7bfe0SEric Dumazet 		time_to_send = netem_skb_cb(skb)->time_to_send;
705836af83bSDave Taht 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
706836af83bSDave Taht 			get_slot_next(q, now);
707aec0a40aSEric Dumazet 
708836af83bSDave Taht 		if (time_to_send <= now && q->slot.slot_next <= now) {
709d66280b1SPeter Oskolkov 			netem_erase_head(q, skb);
710aec0a40aSEric Dumazet 			sch->q.qlen--;
7110ad2a836SBeshay, Joseph 			qdisc_qstats_backlog_dec(sch, skb);
712aec0a40aSEric Dumazet 			skb->next = NULL;
713aec0a40aSEric Dumazet 			skb->prev = NULL;
714bffa72cfSEric Dumazet 			/* skb->dev shares skb->rbnode area,
715bffa72cfSEric Dumazet 			 * we need to restore its value.
716bffa72cfSEric Dumazet 			 */
717bffa72cfSEric Dumazet 			skb->dev = qdisc_dev(sch);
71803c05f0dSJarek Poplawski 
719836af83bSDave Taht 			if (q->slot.slot_next) {
720836af83bSDave Taht 				q->slot.packets_left--;
721836af83bSDave Taht 				q->slot.bytes_left -= qdisc_pkt_len(skb);
722836af83bSDave Taht 				if (q->slot.packets_left <= 0 ||
723836af83bSDave Taht 				    q->slot.bytes_left <= 0)
724836af83bSDave Taht 					get_slot_next(q, now);
725836af83bSDave Taht 			}
726836af83bSDave Taht 
72750612537SEric Dumazet 			if (q->qdisc) {
72821de12eeSEric Dumazet 				unsigned int pkt_len = qdisc_pkt_len(skb);
729520ac30fSEric Dumazet 				struct sk_buff *to_free = NULL;
730520ac30fSEric Dumazet 				int err;
73150612537SEric Dumazet 
732520ac30fSEric Dumazet 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
733520ac30fSEric Dumazet 				kfree_skb_list(to_free);
73421de12eeSEric Dumazet 				if (err != NET_XMIT_SUCCESS &&
73521de12eeSEric Dumazet 				    net_xmit_drop_count(err)) {
73625331d6cSJohn Fastabend 					qdisc_qstats_drop(sch);
7372ccccf5fSWANG Cong 					qdisc_tree_reduce_backlog(sch, 1,
73821de12eeSEric Dumazet 								  pkt_len);
73950612537SEric Dumazet 				}
74050612537SEric Dumazet 				goto tfifo_dequeue;
74150612537SEric Dumazet 			}
742aec0a40aSEric Dumazet 			goto deliver;
74311274e5aSStephen Hemminger 		}
74407aaa115SStephen Hemminger 
74550612537SEric Dumazet 		if (q->qdisc) {
74650612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
74750612537SEric Dumazet 			if (skb)
74850612537SEric Dumazet 				goto deliver;
74950612537SEric Dumazet 		}
750836af83bSDave Taht 
751836af83bSDave Taht 		qdisc_watchdog_schedule_ns(&q->watchdog,
752836af83bSDave Taht 					   max(time_to_send,
753836af83bSDave Taht 					       q->slot.slot_next));
7540f9f32acSStephen Hemminger 	}
7550f9f32acSStephen Hemminger 
75650612537SEric Dumazet 	if (q->qdisc) {
75750612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
75850612537SEric Dumazet 		if (skb)
75950612537SEric Dumazet 			goto deliver;
76050612537SEric Dumazet 	}
7610f9f32acSStephen Hemminger 	return NULL;
7621da177e4SLinus Torvalds }
7631da177e4SLinus Torvalds 
netem_reset(struct Qdisc * sch)7641da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
7651da177e4SLinus Torvalds {
7661da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7671da177e4SLinus Torvalds 
76850612537SEric Dumazet 	qdisc_reset_queue(sch);
769ff704050Sstephen hemminger 	tfifo_reset(sch);
77050612537SEric Dumazet 	if (q->qdisc)
7711da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
77259cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
7731da177e4SLinus Torvalds }
7741da177e4SLinus Torvalds 
dist_free(struct disttable * d)7756373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
7766373a9a2Sstephen hemminger {
7774cb28970SWANG Cong 	kvfree(d);
7786373a9a2Sstephen hemminger }
7796373a9a2Sstephen hemminger 
7801da177e4SLinus Torvalds /*
7811da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
7821da177e4SLinus Torvalds  * signed 16 bit values.
7831da177e4SLinus Torvalds  */
784836af83bSDave Taht 
get_dist_table(struct disttable ** tbl,const struct nlattr * attr)78511b73313SEric Dumazet static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
7861da177e4SLinus Torvalds {
7876373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
7881e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
7891da177e4SLinus Torvalds 	struct disttable *d;
7901da177e4SLinus Torvalds 	int i;
7911da177e4SLinus Torvalds 
792b41d936bSEric Dumazet 	if (!n || n > NETEM_DIST_MAX)
7931da177e4SLinus Torvalds 		return -EINVAL;
7941da177e4SLinus Torvalds 
79512929198SGustavo A. R. Silva 	d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
7961da177e4SLinus Torvalds 	if (!d)
7971da177e4SLinus Torvalds 		return -ENOMEM;
7981da177e4SLinus Torvalds 
7991da177e4SLinus Torvalds 	d->size = n;
8001da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
8011da177e4SLinus Torvalds 		d->table[i] = data[i];
8021da177e4SLinus Torvalds 
80311b73313SEric Dumazet 	*tbl = d;
8041da177e4SLinus Torvalds 	return 0;
8051da177e4SLinus Torvalds }
8061da177e4SLinus Torvalds 
get_slot(struct netem_sched_data * q,const struct nlattr * attr)807836af83bSDave Taht static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
808836af83bSDave Taht {
809836af83bSDave Taht 	const struct tc_netem_slot *c = nla_data(attr);
810836af83bSDave Taht 
811836af83bSDave Taht 	q->slot_config = *c;
812836af83bSDave Taht 	if (q->slot_config.max_packets == 0)
813836af83bSDave Taht 		q->slot_config.max_packets = INT_MAX;
814836af83bSDave Taht 	if (q->slot_config.max_bytes == 0)
815836af83bSDave Taht 		q->slot_config.max_bytes = INT_MAX;
816eadd1befSAleksandr Nogikh 
817eadd1befSAleksandr Nogikh 	/* capping dist_jitter to the range acceptable by tabledist() */
818eadd1befSAleksandr Nogikh 	q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
819eadd1befSAleksandr Nogikh 
820836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
821836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
8220a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
8230a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter)
824836af83bSDave Taht 		q->slot.slot_next = ktime_get_ns();
825836af83bSDave Taht 	else
826836af83bSDave Taht 		q->slot.slot_next = 0;
827836af83bSDave Taht }
828836af83bSDave Taht 
get_correlation(struct netem_sched_data * q,const struct nlattr * attr)82949545a77SYang Yingliang static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
8301da177e4SLinus Torvalds {
8311e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
8321da177e4SLinus Torvalds 
8331da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
8341da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
8351da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
8361da177e4SLinus Torvalds }
8371da177e4SLinus Torvalds 
get_reorder(struct netem_sched_data * q,const struct nlattr * attr)83849545a77SYang Yingliang static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
8390dca51d3SStephen Hemminger {
8401e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
8410dca51d3SStephen Hemminger 
8420dca51d3SStephen Hemminger 	q->reorder = r->probability;
8430dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
8440dca51d3SStephen Hemminger }
8450dca51d3SStephen Hemminger 
get_corrupt(struct netem_sched_data * q,const struct nlattr * attr)84649545a77SYang Yingliang static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
847c865e5d9SStephen Hemminger {
8481e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
849c865e5d9SStephen Hemminger 
850c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
851c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
852c865e5d9SStephen Hemminger }
853c865e5d9SStephen Hemminger 
get_rate(struct netem_sched_data * q,const struct nlattr * attr)85449545a77SYang Yingliang static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
8557bc0f28cSHagen Paul Pfeifer {
8567bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
8577bc0f28cSHagen Paul Pfeifer 
8587bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
85990b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
86090b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
861809fa972SHannes Frederic Sowa 	q->cell_overhead = r->cell_overhead;
86290b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
86390b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
864809fa972SHannes Frederic Sowa 	else
865809fa972SHannes Frederic Sowa 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
8667bc0f28cSHagen Paul Pfeifer }
8677bc0f28cSHagen Paul Pfeifer 
get_loss_clg(struct netem_sched_data * q,const struct nlattr * attr)86849545a77SYang Yingliang static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
869661b7972Sstephen hemminger {
870661b7972Sstephen hemminger 	const struct nlattr *la;
871661b7972Sstephen hemminger 	int rem;
872661b7972Sstephen hemminger 
873661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
874661b7972Sstephen hemminger 		u16 type = nla_type(la);
875661b7972Sstephen hemminger 
876661b7972Sstephen hemminger 		switch (type) {
877661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
878661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
879661b7972Sstephen hemminger 
8802494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
881661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
882661b7972Sstephen hemminger 				return -EINVAL;
883661b7972Sstephen hemminger 			}
884661b7972Sstephen hemminger 
885661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
886661b7972Sstephen hemminger 
8873fbac2a8SYang Yingliang 			q->clg.state = TX_IN_GAP_PERIOD;
888661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
889661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
890661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
891661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
892661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
893661b7972Sstephen hemminger 			break;
894661b7972Sstephen hemminger 		}
895661b7972Sstephen hemminger 
896661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
897661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
898661b7972Sstephen hemminger 
8992494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
9002494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
901661b7972Sstephen hemminger 				return -EINVAL;
902661b7972Sstephen hemminger 			}
903661b7972Sstephen hemminger 
904661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
9053fbac2a8SYang Yingliang 			q->clg.state = GOOD_STATE;
906661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
907661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
908661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
909661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
910661b7972Sstephen hemminger 			break;
911661b7972Sstephen hemminger 		}
912661b7972Sstephen hemminger 
913661b7972Sstephen hemminger 		default:
914661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
915661b7972Sstephen hemminger 			return -EINVAL;
916661b7972Sstephen hemminger 		}
917661b7972Sstephen hemminger 	}
918661b7972Sstephen hemminger 
919661b7972Sstephen hemminger 	return 0;
920661b7972Sstephen hemminger }
921661b7972Sstephen hemminger 
92227a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
92327a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
92427a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
92527a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
9267bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
927661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
928e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
9296a031f67SYang Yingliang 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
93099803171SDave Taht 	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
93199803171SDave Taht 	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
932836af83bSDave Taht 	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
9334072d97dSFrançois Michel 	[TCA_NETEM_PRNG_SEED]	= { .type = NLA_U64 },
93427a3421eSPatrick McHardy };
93527a3421eSPatrick McHardy 
parse_attr(struct nlattr * tb[],int maxtype,struct nlattr * nla,const struct nla_policy * policy,int len)9362c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
9372c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
9382c10b32bSThomas Graf {
9392c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
9402c10b32bSThomas Graf 
941661b7972Sstephen hemminger 	if (nested_len < 0) {
942661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
9432c10b32bSThomas Graf 		return -EINVAL;
944661b7972Sstephen hemminger 	}
945661b7972Sstephen hemminger 
9462c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
9478cb08174SJohannes Berg 		return nla_parse_deprecated(tb, maxtype,
9488cb08174SJohannes Berg 					    nla_data(nla) + NLA_ALIGN(len),
949fceb6435SJohannes Berg 					    nested_len, policy, NULL);
950661b7972Sstephen hemminger 
9512c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
9522c10b32bSThomas Graf 	return 0;
9532c10b32bSThomas Graf }
9542c10b32bSThomas Graf 
955c865e5d9SStephen Hemminger /* Parse netlink message to set options */
netem_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)9562030721cSAlexander Aring static int netem_change(struct Qdisc *sch, struct nlattr *opt,
9572030721cSAlexander Aring 			struct netlink_ext_ack *extack)
9581da177e4SLinus Torvalds {
9591da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
960b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
96111b73313SEric Dumazet 	struct disttable *delay_dist = NULL;
96211b73313SEric Dumazet 	struct disttable *slot_dist = NULL;
9631da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
96454a4b05cSYang Yingliang 	struct clgstate old_clg;
96554a4b05cSYang Yingliang 	int old_loss_model = CLG_RANDOM;
9661da177e4SLinus Torvalds 	int ret;
9671da177e4SLinus Torvalds 
9682c10b32bSThomas Graf 	qopt = nla_data(opt);
9692c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
970b03f4672SPatrick McHardy 	if (ret < 0)
971b03f4672SPatrick McHardy 		return ret;
972b03f4672SPatrick McHardy 
97311b73313SEric Dumazet 	if (tb[TCA_NETEM_DELAY_DIST]) {
97411b73313SEric Dumazet 		ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
97511b73313SEric Dumazet 		if (ret)
97611b73313SEric Dumazet 			goto table_free;
97711b73313SEric Dumazet 	}
97811b73313SEric Dumazet 
97911b73313SEric Dumazet 	if (tb[TCA_NETEM_SLOT_DIST]) {
98011b73313SEric Dumazet 		ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
98111b73313SEric Dumazet 		if (ret)
98211b73313SEric Dumazet 			goto table_free;
98311b73313SEric Dumazet 	}
98411b73313SEric Dumazet 
9852174a08dSEric Dumazet 	sch_tree_lock(sch);
98654a4b05cSYang Yingliang 	/* backup q->clg and q->loss_model */
98754a4b05cSYang Yingliang 	old_clg = q->clg;
98854a4b05cSYang Yingliang 	old_loss_model = q->loss_model;
98954a4b05cSYang Yingliang 
99054a4b05cSYang Yingliang 	if (tb[TCA_NETEM_LOSS]) {
99149545a77SYang Yingliang 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
99254a4b05cSYang Yingliang 		if (ret) {
99354a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
99411b73313SEric Dumazet 			q->clg = old_clg;
9952174a08dSEric Dumazet 			goto unlock;
99654a4b05cSYang Yingliang 		}
99754a4b05cSYang Yingliang 	} else {
99854a4b05cSYang Yingliang 		q->loss_model = CLG_RANDOM;
99954a4b05cSYang Yingliang 	}
100054a4b05cSYang Yingliang 
100111b73313SEric Dumazet 	if (delay_dist)
100211b73313SEric Dumazet 		swap(q->delay_dist, delay_dist);
100311b73313SEric Dumazet 	if (slot_dist)
100411b73313SEric Dumazet 		swap(q->slot_dist, slot_dist);
100550612537SEric Dumazet 	sch->limit = qopt->limit;
10061da177e4SLinus Torvalds 
1007112f9cb6SDave Taht 	q->latency = PSCHED_TICKS2NS(qopt->latency);
1008112f9cb6SDave Taht 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
10091da177e4SLinus Torvalds 	q->limit = qopt->limit;
10101da177e4SLinus Torvalds 	q->gap = qopt->gap;
10110dca51d3SStephen Hemminger 	q->counter = 0;
10121da177e4SLinus Torvalds 	q->loss = qopt->loss;
10131da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
10141da177e4SLinus Torvalds 
1015bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
1016bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
10170dca51d3SStephen Hemminger 	 */
1018a362e0a7SStephen Hemminger 	if (q->gap)
10190dca51d3SStephen Hemminger 		q->reorder = ~0;
10200dca51d3SStephen Hemminger 
1021265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
102249545a77SYang Yingliang 		get_correlation(q, tb[TCA_NETEM_CORR]);
10231da177e4SLinus Torvalds 
1024265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
102549545a77SYang Yingliang 		get_reorder(q, tb[TCA_NETEM_REORDER]);
10261da177e4SLinus Torvalds 
1027265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
102849545a77SYang Yingliang 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
10291da177e4SLinus Torvalds 
10307bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
103149545a77SYang Yingliang 		get_rate(q, tb[TCA_NETEM_RATE]);
10327bc0f28cSHagen Paul Pfeifer 
10336a031f67SYang Yingliang 	if (tb[TCA_NETEM_RATE64])
10346a031f67SYang Yingliang 		q->rate = max_t(u64, q->rate,
10356a031f67SYang Yingliang 				nla_get_u64(tb[TCA_NETEM_RATE64]));
10366a031f67SYang Yingliang 
103799803171SDave Taht 	if (tb[TCA_NETEM_LATENCY64])
103899803171SDave Taht 		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
103999803171SDave Taht 
104099803171SDave Taht 	if (tb[TCA_NETEM_JITTER64])
104199803171SDave Taht 		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
104299803171SDave Taht 
1043e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
1044e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1045e4ae004bSEric Dumazet 
1046836af83bSDave Taht 	if (tb[TCA_NETEM_SLOT])
1047836af83bSDave Taht 		get_slot(q, tb[TCA_NETEM_SLOT]);
1048836af83bSDave Taht 
1049eadd1befSAleksandr Nogikh 	/* capping jitter to the range acceptable by tabledist() */
1050eadd1befSAleksandr Nogikh 	q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
1051eadd1befSAleksandr Nogikh 
10524072d97dSFrançois Michel 	if (tb[TCA_NETEM_PRNG_SEED])
10534072d97dSFrançois Michel 		q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
10544072d97dSFrançois Michel 	else
10554072d97dSFrançois Michel 		q->prng.seed = get_random_u64();
10564072d97dSFrançois Michel 	prandom_seed_state(&q->prng.prng_state, q->prng.seed);
10574072d97dSFrançois Michel 
10582174a08dSEric Dumazet unlock:
10592174a08dSEric Dumazet 	sch_tree_unlock(sch);
106011b73313SEric Dumazet 
106111b73313SEric Dumazet table_free:
106211b73313SEric Dumazet 	dist_free(delay_dist);
106311b73313SEric Dumazet 	dist_free(slot_dist);
1064661b7972Sstephen hemminger 	return ret;
10651da177e4SLinus Torvalds }
10661da177e4SLinus Torvalds 
netem_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1067e63d7dfdSAlexander Aring static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1068e63d7dfdSAlexander Aring 		      struct netlink_ext_ack *extack)
10691da177e4SLinus Torvalds {
10701da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10711da177e4SLinus Torvalds 	int ret;
10721da177e4SLinus Torvalds 
1073634576a1SNikolay Aleksandrov 	qdisc_watchdog_init(&q->watchdog, sch);
1074634576a1SNikolay Aleksandrov 
10751da177e4SLinus Torvalds 	if (!opt)
10761da177e4SLinus Torvalds 		return -EINVAL;
10771da177e4SLinus Torvalds 
1078661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
10792030721cSAlexander Aring 	ret = netem_change(sch, opt, extack);
108050612537SEric Dumazet 	if (ret)
1081250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
10821da177e4SLinus Torvalds 	return ret;
10831da177e4SLinus Torvalds }
10841da177e4SLinus Torvalds 
netem_destroy(struct Qdisc * sch)10851da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
10861da177e4SLinus Torvalds {
10871da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10881da177e4SLinus Torvalds 
108959cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
109050612537SEric Dumazet 	if (q->qdisc)
109186bd446bSVlad Buslov 		qdisc_put(q->qdisc);
10926373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
10930a9fe5c3SYousuk Seung 	dist_free(q->slot_dist);
10941da177e4SLinus Torvalds }
10951da177e4SLinus Torvalds 
dump_loss_model(const struct netem_sched_data * q,struct sk_buff * skb)1096661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
1097661b7972Sstephen hemminger 			   struct sk_buff *skb)
1098661b7972Sstephen hemminger {
1099661b7972Sstephen hemminger 	struct nlattr *nest;
1100661b7972Sstephen hemminger 
1101ae0be8deSMichal Kubecek 	nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
1102661b7972Sstephen hemminger 	if (nest == NULL)
1103661b7972Sstephen hemminger 		goto nla_put_failure;
1104661b7972Sstephen hemminger 
1105661b7972Sstephen hemminger 	switch (q->loss_model) {
1106661b7972Sstephen hemminger 	case CLG_RANDOM:
1107661b7972Sstephen hemminger 		/* legacy loss model */
1108661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
1109661b7972Sstephen hemminger 		return 0;	/* no data */
1110661b7972Sstephen hemminger 
1111661b7972Sstephen hemminger 	case CLG_4_STATES: {
1112661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
1113661b7972Sstephen hemminger 			.p13 = q->clg.a1,
1114661b7972Sstephen hemminger 			.p31 = q->clg.a2,
1115661b7972Sstephen hemminger 			.p32 = q->clg.a3,
1116661b7972Sstephen hemminger 			.p14 = q->clg.a4,
1117661b7972Sstephen hemminger 			.p23 = q->clg.a5,
1118661b7972Sstephen hemminger 		};
1119661b7972Sstephen hemminger 
11201b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
11211b34ec43SDavid S. Miller 			goto nla_put_failure;
1122661b7972Sstephen hemminger 		break;
1123661b7972Sstephen hemminger 	}
1124661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
1125661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
1126661b7972Sstephen hemminger 			.p = q->clg.a1,
1127661b7972Sstephen hemminger 			.r = q->clg.a2,
1128661b7972Sstephen hemminger 			.h = q->clg.a3,
1129661b7972Sstephen hemminger 			.k1 = q->clg.a4,
1130661b7972Sstephen hemminger 		};
1131661b7972Sstephen hemminger 
11321b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
11331b34ec43SDavid S. Miller 			goto nla_put_failure;
1134661b7972Sstephen hemminger 		break;
1135661b7972Sstephen hemminger 	}
1136661b7972Sstephen hemminger 	}
1137661b7972Sstephen hemminger 
1138661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
1139661b7972Sstephen hemminger 	return 0;
1140661b7972Sstephen hemminger 
1141661b7972Sstephen hemminger nla_put_failure:
1142661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
1143661b7972Sstephen hemminger 	return -1;
1144661b7972Sstephen hemminger }
1145661b7972Sstephen hemminger 
netem_dump(struct Qdisc * sch,struct sk_buff * skb)11461da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
11471da177e4SLinus Torvalds {
11481da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
1149861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
11501da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
11511da177e4SLinus Torvalds 	struct tc_netem_corr cor;
11520dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
1153c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
11547bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
1155836af83bSDave Taht 	struct tc_netem_slot slot;
11561da177e4SLinus Torvalds 
1157a2b1a5d4SPeilin Ye 	qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
1158112f9cb6SDave Taht 			     UINT_MAX);
1159a2b1a5d4SPeilin Ye 	qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
1160112f9cb6SDave Taht 			    UINT_MAX);
11611da177e4SLinus Torvalds 	qopt.limit = q->limit;
11621da177e4SLinus Torvalds 	qopt.loss = q->loss;
11631da177e4SLinus Torvalds 	qopt.gap = q->gap;
11641da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
11651b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
11661b34ec43SDavid S. Miller 		goto nla_put_failure;
11671da177e4SLinus Torvalds 
116899803171SDave Taht 	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
116999803171SDave Taht 		goto nla_put_failure;
117099803171SDave Taht 
117199803171SDave Taht 	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
117299803171SDave Taht 		goto nla_put_failure;
117399803171SDave Taht 
11741da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
11751da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
11761da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
11771b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
11781b34ec43SDavid S. Miller 		goto nla_put_failure;
11790dca51d3SStephen Hemminger 
11800dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
11810dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
11821b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
11831b34ec43SDavid S. Miller 		goto nla_put_failure;
11840dca51d3SStephen Hemminger 
1185c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
1186c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
11871b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
11881b34ec43SDavid S. Miller 		goto nla_put_failure;
1189c865e5d9SStephen Hemminger 
11906a031f67SYang Yingliang 	if (q->rate >= (1ULL << 32)) {
11912a51c1e8SNicolas Dichtel 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
11922a51c1e8SNicolas Dichtel 				      TCA_NETEM_PAD))
11936a031f67SYang Yingliang 			goto nla_put_failure;
11946a031f67SYang Yingliang 		rate.rate = ~0U;
11956a031f67SYang Yingliang 	} else {
11967bc0f28cSHagen Paul Pfeifer 		rate.rate = q->rate;
11976a031f67SYang Yingliang 	}
119890b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
119990b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
120090b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
12011b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
12021b34ec43SDavid S. Miller 		goto nla_put_failure;
12037bc0f28cSHagen Paul Pfeifer 
1204e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1205e4ae004bSEric Dumazet 		goto nla_put_failure;
1206e4ae004bSEric Dumazet 
1207661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
1208661b7972Sstephen hemminger 		goto nla_put_failure;
1209661b7972Sstephen hemminger 
12100a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
12110a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter) {
1212836af83bSDave Taht 		slot = q->slot_config;
1213836af83bSDave Taht 		if (slot.max_packets == INT_MAX)
1214836af83bSDave Taht 			slot.max_packets = 0;
1215836af83bSDave Taht 		if (slot.max_bytes == INT_MAX)
1216836af83bSDave Taht 			slot.max_bytes = 0;
1217836af83bSDave Taht 		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1218836af83bSDave Taht 			goto nla_put_failure;
1219836af83bSDave Taht 	}
1220836af83bSDave Taht 
12214072d97dSFrançois Michel 	if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
12224072d97dSFrançois Michel 			      TCA_NETEM_PAD))
12234072d97dSFrançois Michel 		goto nla_put_failure;
12244072d97dSFrançois Michel 
1225861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
12261da177e4SLinus Torvalds 
12271e90474cSPatrick McHardy nla_put_failure:
1228861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
12291da177e4SLinus Torvalds 	return -1;
12301da177e4SLinus Torvalds }
12311da177e4SLinus Torvalds 
netem_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)123210f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
123310f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
123410f6dfcfSstephen hemminger {
123510f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
123610f6dfcfSstephen hemminger 
123750612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
123810f6dfcfSstephen hemminger 		return -ENOENT;
123910f6dfcfSstephen hemminger 
124010f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
124110f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
124210f6dfcfSstephen hemminger 
124310f6dfcfSstephen hemminger 	return 0;
124410f6dfcfSstephen hemminger }
124510f6dfcfSstephen hemminger 
netem_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)124610f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1247653d6fd6SAlexander Aring 		     struct Qdisc **old, struct netlink_ext_ack *extack)
124810f6dfcfSstephen hemminger {
124910f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
125010f6dfcfSstephen hemminger 
125186a7996cSWANG Cong 	*old = qdisc_replace(sch, new, &q->qdisc);
125210f6dfcfSstephen hemminger 	return 0;
125310f6dfcfSstephen hemminger }
125410f6dfcfSstephen hemminger 
netem_leaf(struct Qdisc * sch,unsigned long arg)125510f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
125610f6dfcfSstephen hemminger {
125710f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
125810f6dfcfSstephen hemminger 	return q->qdisc;
125910f6dfcfSstephen hemminger }
126010f6dfcfSstephen hemminger 
netem_find(struct Qdisc * sch,u32 classid)1261143976ceSWANG Cong static unsigned long netem_find(struct Qdisc *sch, u32 classid)
126210f6dfcfSstephen hemminger {
126310f6dfcfSstephen hemminger 	return 1;
126410f6dfcfSstephen hemminger }
126510f6dfcfSstephen hemminger 
netem_walk(struct Qdisc * sch,struct qdisc_walker * walker)126610f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
126710f6dfcfSstephen hemminger {
126810f6dfcfSstephen hemminger 	if (!walker->stop) {
1269e046fa89SZhengchao Shao 		if (!tc_qdisc_stats_dump(sch, 1, walker))
127010f6dfcfSstephen hemminger 			return;
127110f6dfcfSstephen hemminger 	}
127210f6dfcfSstephen hemminger }
127310f6dfcfSstephen hemminger 
127410f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
127510f6dfcfSstephen hemminger 	.graft		=	netem_graft,
127610f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
1277143976ceSWANG Cong 	.find		=	netem_find,
127810f6dfcfSstephen hemminger 	.walk		=	netem_walk,
127910f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
128010f6dfcfSstephen hemminger };
128110f6dfcfSstephen hemminger 
128220fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
12831da177e4SLinus Torvalds 	.id		=	"netem",
128410f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
12851da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
12861da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
12871da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
128877be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
12891da177e4SLinus Torvalds 	.init		=	netem_init,
12901da177e4SLinus Torvalds 	.reset		=	netem_reset,
12911da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
12921da177e4SLinus Torvalds 	.change		=	netem_change,
12931da177e4SLinus Torvalds 	.dump		=	netem_dump,
12941da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
12951da177e4SLinus Torvalds };
12961da177e4SLinus Torvalds 
12971da177e4SLinus Torvalds 
netem_module_init(void)12981da177e4SLinus Torvalds static int __init netem_module_init(void)
12991da177e4SLinus Torvalds {
1300eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
13011da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
13021da177e4SLinus Torvalds }
netem_module_exit(void)13031da177e4SLinus Torvalds static void __exit netem_module_exit(void)
13041da177e4SLinus Torvalds {
13051da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
13061da177e4SLinus Torvalds }
13071da177e4SLinus Torvalds module_init(netem_module_init)
13081da177e4SLinus Torvalds module_exit(netem_module_exit)
13091da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1310