xref: /openbmc/linux/net/sched/sch_netem.c (revision 4072d97d)
184a14ae8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
61da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
91da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
101da177e4SLinus Torvalds  */
111da177e4SLinus Torvalds 
12b7f080cfSAlexey Dobriyan #include <linux/mm.h>
131da177e4SLinus Torvalds #include <linux/module.h>
145a0e3ad6STejun Heo #include <linux/slab.h>
151da177e4SLinus Torvalds #include <linux/types.h>
161da177e4SLinus Torvalds #include <linux/kernel.h>
171da177e4SLinus Torvalds #include <linux/errno.h>
181da177e4SLinus Torvalds #include <linux/skbuff.h>
1978776d3fSDavid S. Miller #include <linux/vmalloc.h>
201da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2190b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
22aec0a40aSEric Dumazet #include <linux/rbtree.h>
231da177e4SLinus Torvalds 
24d457a0e3SEric Dumazet #include <net/gso.h>
25dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
261da177e4SLinus Torvalds #include <net/pkt_sched.h>
27e4ae004bSEric Dumazet #include <net/inet_ecn.h>
281da177e4SLinus Torvalds 
29250a65f7Sstephen hemminger #define VERSION "1.3"
30eb229c4cSStephen Hemminger 
311da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
321da177e4SLinus Torvalds 	====================================
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
351da177e4SLinus Torvalds 		 Network Emulation Tool
361da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds 	 ----------------------------------------------------------------
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
411da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
421da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
431da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
441da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
451da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
461da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
491da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
501da177e4SLinus Torvalds 	 control either since that can be handled by using token
511da177e4SLinus Torvalds 	 bucket or other rate control.
52661b7972Sstephen hemminger 
53661b7972Sstephen hemminger      Correlated Loss Generator models
54661b7972Sstephen hemminger 
55661b7972Sstephen hemminger 	Added generation of correlated loss according to the
56661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
57661b7972Sstephen hemminger 
58661b7972Sstephen hemminger 	References:
59661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
60661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
61661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
62661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
63661b7972Sstephen hemminger 
64661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
65661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
661da177e4SLinus Torvalds */
671da177e4SLinus Torvalds 
680a9fe5c3SYousuk Seung struct disttable {
690a9fe5c3SYousuk Seung 	u32  size;
70b90feaffSGustavo A. R. Silva 	s16 table[];
710a9fe5c3SYousuk Seung };
720a9fe5c3SYousuk Seung 
731da177e4SLinus Torvalds struct netem_sched_data {
74aec0a40aSEric Dumazet 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
75aec0a40aSEric Dumazet 	struct rb_root t_root;
7650612537SEric Dumazet 
77d66280b1SPeter Oskolkov 	/* a linear queue; reduces rbtree rebalancing when jitter is low */
78d66280b1SPeter Oskolkov 	struct sk_buff	*t_head;
79d66280b1SPeter Oskolkov 	struct sk_buff	*t_tail;
80d66280b1SPeter Oskolkov 
8150612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
821da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
8350612537SEric Dumazet 
8459cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
851da177e4SLinus Torvalds 
86112f9cb6SDave Taht 	s64 latency;
87112f9cb6SDave Taht 	s64 jitter;
88b407621cSStephen Hemminger 
891da177e4SLinus Torvalds 	u32 loss;
90e4ae004bSEric Dumazet 	u32 ecn;
911da177e4SLinus Torvalds 	u32 limit;
921da177e4SLinus Torvalds 	u32 counter;
931da177e4SLinus Torvalds 	u32 gap;
941da177e4SLinus Torvalds 	u32 duplicate;
950dca51d3SStephen Hemminger 	u32 reorder;
96c865e5d9SStephen Hemminger 	u32 corrupt;
976a031f67SYang Yingliang 	u64 rate;
9890b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9990b41a1cSHagen Paul Pfeifer 	u32 cell_size;
100809fa972SHannes Frederic Sowa 	struct reciprocal_value cell_size_reciprocal;
10190b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
1021da177e4SLinus Torvalds 
1031da177e4SLinus Torvalds 	struct crndstate {
104b407621cSStephen Hemminger 		u32 last;
105b407621cSStephen Hemminger 		u32 rho;
106c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1071da177e4SLinus Torvalds 
108*4072d97dSFrançois Michel 	struct prng  {
109*4072d97dSFrançois Michel 		u64 seed;
110*4072d97dSFrançois Michel 		struct rnd_state prng_state;
111*4072d97dSFrançois Michel 	} prng;
112*4072d97dSFrançois Michel 
1130a9fe5c3SYousuk Seung 	struct disttable *delay_dist;
114661b7972Sstephen hemminger 
115661b7972Sstephen hemminger 	enum  {
116661b7972Sstephen hemminger 		CLG_RANDOM,
117661b7972Sstephen hemminger 		CLG_4_STATES,
118661b7972Sstephen hemminger 		CLG_GILB_ELL,
119661b7972Sstephen hemminger 	} loss_model;
120661b7972Sstephen hemminger 
121a6e2fe17SYang Yingliang 	enum {
122a6e2fe17SYang Yingliang 		TX_IN_GAP_PERIOD = 1,
123a6e2fe17SYang Yingliang 		TX_IN_BURST_PERIOD,
124a6e2fe17SYang Yingliang 		LOST_IN_GAP_PERIOD,
125a6e2fe17SYang Yingliang 		LOST_IN_BURST_PERIOD,
126a6e2fe17SYang Yingliang 	} _4_state_model;
127a6e2fe17SYang Yingliang 
128c045a734SYang Yingliang 	enum {
129c045a734SYang Yingliang 		GOOD_STATE = 1,
130c045a734SYang Yingliang 		BAD_STATE,
131c045a734SYang Yingliang 	} GE_state_model;
132c045a734SYang Yingliang 
133661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
134661b7972Sstephen hemminger 	struct clgstate {
135661b7972Sstephen hemminger 		/* state of the Markov chain */
136661b7972Sstephen hemminger 		u8 state;
137661b7972Sstephen hemminger 
138661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
139661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
140661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
141661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
142661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
143661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
144661b7972Sstephen hemminger 	} clg;
145661b7972Sstephen hemminger 
146836af83bSDave Taht 	struct tc_netem_slot slot_config;
147836af83bSDave Taht 	struct slotstate {
148836af83bSDave Taht 		u64 slot_next;
149836af83bSDave Taht 		s32 packets_left;
150836af83bSDave Taht 		s32 bytes_left;
151836af83bSDave Taht 	} slot;
152836af83bSDave Taht 
1530a9fe5c3SYousuk Seung 	struct disttable *slot_dist;
1541da177e4SLinus Torvalds };
1551da177e4SLinus Torvalds 
15650612537SEric Dumazet /* Time stamp put into socket buffer control block
15750612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
15856b17425SEric Dumazet  *
15956b17425SEric Dumazet  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
16056b17425SEric Dumazet  * and skb->next & skb->prev are scratch space for a qdisc,
16156b17425SEric Dumazet  * we save skb->tstamp value in skb->cb[] before destroying it.
16250612537SEric Dumazet  */
1631da177e4SLinus Torvalds struct netem_skb_cb {
164112f9cb6SDave Taht 	u64	        time_to_send;
1651da177e4SLinus Torvalds };
1661da177e4SLinus Torvalds 
1675f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1685f86173bSJussi Kivilinna {
169aec0a40aSEric Dumazet 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
17016bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
171175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1725f86173bSJussi Kivilinna }
1735f86173bSJussi Kivilinna 
1741da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1751da177e4SLinus Torvalds  * Use entropy source for initial seed.
1761da177e4SLinus Torvalds  */
1771da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1781da177e4SLinus Torvalds {
1791da177e4SLinus Torvalds 	state->rho = rho;
180a251c17aSJason A. Donenfeld 	state->last = get_random_u32();
1811da177e4SLinus Torvalds }
1821da177e4SLinus Torvalds 
1831da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1841da177e4SLinus Torvalds  * Next number depends on last value.
1851da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1861da177e4SLinus Torvalds  */
187b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1881da177e4SLinus Torvalds {
1891da177e4SLinus Torvalds 	u64 value, rho;
1901da177e4SLinus Torvalds 	unsigned long answer;
1911da177e4SLinus Torvalds 
1920a9fe5c3SYousuk Seung 	if (!state || state->rho == 0)	/* no correlation */
193a251c17aSJason A. Donenfeld 		return get_random_u32();
1941da177e4SLinus Torvalds 
195a251c17aSJason A. Donenfeld 	value = get_random_u32();
1961da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1971da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1981da177e4SLinus Torvalds 	state->last = answer;
1991da177e4SLinus Torvalds 	return answer;
2001da177e4SLinus Torvalds }
2011da177e4SLinus Torvalds 
202661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
203661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
204661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
205661b7972Sstephen hemminger  */
206661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
207661b7972Sstephen hemminger {
208661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
209a251c17aSJason A. Donenfeld 	u32 rnd = get_random_u32();
210661b7972Sstephen hemminger 
211661b7972Sstephen hemminger 	/*
21225985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
213661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
214661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
215661b7972Sstephen hemminger 	 * The four states correspond to:
216a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
217cb3ef7b0SHarshit Mogalapalli 	 *   LOST_IN_GAP_PERIOD => isolated losses within a gap period
218cb3ef7b0SHarshit Mogalapalli 	 *   LOST_IN_BURST_PERIOD => lost packets within a burst period
219cb3ef7b0SHarshit Mogalapalli 	 *   TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period
220661b7972Sstephen hemminger 	 */
221661b7972Sstephen hemminger 	switch (clg->state) {
222a6e2fe17SYang Yingliang 	case TX_IN_GAP_PERIOD:
223661b7972Sstephen hemminger 		if (rnd < clg->a4) {
224cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_GAP_PERIOD;
225661b7972Sstephen hemminger 			return true;
226ab6c27beSstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
227cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_BURST_PERIOD;
228661b7972Sstephen hemminger 			return true;
229a6e2fe17SYang Yingliang 		} else if (clg->a1 + clg->a4 < rnd) {
230a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
231a6e2fe17SYang Yingliang 		}
232661b7972Sstephen hemminger 
233661b7972Sstephen hemminger 		break;
234a6e2fe17SYang Yingliang 	case TX_IN_BURST_PERIOD:
235661b7972Sstephen hemminger 		if (rnd < clg->a5) {
236cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_BURST_PERIOD;
237661b7972Sstephen hemminger 			return true;
238a6e2fe17SYang Yingliang 		} else {
239a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
240a6e2fe17SYang Yingliang 		}
241661b7972Sstephen hemminger 
242661b7972Sstephen hemminger 		break;
243cb3ef7b0SHarshit Mogalapalli 	case LOST_IN_BURST_PERIOD:
244661b7972Sstephen hemminger 		if (rnd < clg->a3)
245a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
246661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
247a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
248661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
249cb3ef7b0SHarshit Mogalapalli 			clg->state = LOST_IN_BURST_PERIOD;
250661b7972Sstephen hemminger 			return true;
251661b7972Sstephen hemminger 		}
252661b7972Sstephen hemminger 		break;
253cb3ef7b0SHarshit Mogalapalli 	case LOST_IN_GAP_PERIOD:
254a6e2fe17SYang Yingliang 		clg->state = TX_IN_GAP_PERIOD;
255661b7972Sstephen hemminger 		break;
256661b7972Sstephen hemminger 	}
257661b7972Sstephen hemminger 
258661b7972Sstephen hemminger 	return false;
259661b7972Sstephen hemminger }
260661b7972Sstephen hemminger 
261661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
262661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
263661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
264661b7972Sstephen hemminger  *
26525985edcSLucas De Marchi  * Makes a comparison between random number and the transition
266661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
26725985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
268661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
269661b7972Sstephen hemminger  * packet will be transmitted or lost.
270661b7972Sstephen hemminger  */
271661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
272661b7972Sstephen hemminger {
273661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
274661b7972Sstephen hemminger 
275661b7972Sstephen hemminger 	switch (clg->state) {
276c045a734SYang Yingliang 	case GOOD_STATE:
277a251c17aSJason A. Donenfeld 		if (get_random_u32() < clg->a1)
278c045a734SYang Yingliang 			clg->state = BAD_STATE;
279a251c17aSJason A. Donenfeld 		if (get_random_u32() < clg->a4)
280661b7972Sstephen hemminger 			return true;
2817c2781faSstephen hemminger 		break;
282c045a734SYang Yingliang 	case BAD_STATE:
283a251c17aSJason A. Donenfeld 		if (get_random_u32() < clg->a2)
284c045a734SYang Yingliang 			clg->state = GOOD_STATE;
285a251c17aSJason A. Donenfeld 		if (get_random_u32() > clg->a3)
286661b7972Sstephen hemminger 			return true;
287661b7972Sstephen hemminger 	}
288661b7972Sstephen hemminger 
289661b7972Sstephen hemminger 	return false;
290661b7972Sstephen hemminger }
291661b7972Sstephen hemminger 
292661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
293661b7972Sstephen hemminger {
294661b7972Sstephen hemminger 	switch (q->loss_model) {
295661b7972Sstephen hemminger 	case CLG_RANDOM:
296661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
297661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
298661b7972Sstephen hemminger 
299661b7972Sstephen hemminger 	case CLG_4_STATES:
300661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
301661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
302661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
303661b7972Sstephen hemminger 		* the kernel logs
304661b7972Sstephen hemminger 		*/
305661b7972Sstephen hemminger 		return loss_4state(q);
306661b7972Sstephen hemminger 
307661b7972Sstephen hemminger 	case CLG_GILB_ELL:
308661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
309661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
310661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
311661b7972Sstephen hemminger 		* the kernel logs
312661b7972Sstephen hemminger 		*/
313661b7972Sstephen hemminger 		return loss_gilb_ell(q);
314661b7972Sstephen hemminger 	}
315661b7972Sstephen hemminger 
316661b7972Sstephen hemminger 	return false;	/* not reached */
317661b7972Sstephen hemminger }
318661b7972Sstephen hemminger 
319661b7972Sstephen hemminger 
3201da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
3211da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
3221da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
3231da177e4SLinus Torvalds  */
3249b0ed891SStephen Hemminger static s64 tabledist(s64 mu, s32 sigma,
325b407621cSStephen Hemminger 		     struct crndstate *state,
326b407621cSStephen Hemminger 		     const struct disttable *dist)
3271da177e4SLinus Torvalds {
328112f9cb6SDave Taht 	s64 x;
329b407621cSStephen Hemminger 	long t;
330b407621cSStephen Hemminger 	u32 rnd;
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds 	if (sigma == 0)
3331da177e4SLinus Torvalds 		return mu;
3341da177e4SLinus Torvalds 
3351da177e4SLinus Torvalds 	rnd = get_crandom(state);
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	/* default uniform distribution */
3381da177e4SLinus Torvalds 	if (dist == NULL)
339eadd1befSAleksandr Nogikh 		return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
3401da177e4SLinus Torvalds 
3411da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3421da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3431da177e4SLinus Torvalds 	if (x >= 0)
3441da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3451da177e4SLinus Torvalds 	else
3461da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3471da177e4SLinus Torvalds 
3481da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3491da177e4SLinus Torvalds }
3501da177e4SLinus Torvalds 
351bce552fdSStephen Hemminger static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
3527bc0f28cSHagen Paul Pfeifer {
35390b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
35490b41a1cSHagen Paul Pfeifer 
35590b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
35690b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
35790b41a1cSHagen Paul Pfeifer 
35890b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
35990b41a1cSHagen Paul Pfeifer 			cells++;
36090b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
36190b41a1cSHagen Paul Pfeifer 	}
362bce552fdSStephen Hemminger 
363bce552fdSStephen Hemminger 	return div64_u64(len * NSEC_PER_SEC, q->rate);
3647bc0f28cSHagen Paul Pfeifer }
3657bc0f28cSHagen Paul Pfeifer 
366ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch)
367ff704050Sstephen hemminger {
368ff704050Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3693aa605f2SEric Dumazet 	struct rb_node *p = rb_first(&q->t_root);
370ff704050Sstephen hemminger 
3713aa605f2SEric Dumazet 	while (p) {
37218a4c0eaSEric Dumazet 		struct sk_buff *skb = rb_to_skb(p);
373ff704050Sstephen hemminger 
3743aa605f2SEric Dumazet 		p = rb_next(p);
3753aa605f2SEric Dumazet 		rb_erase(&skb->rbnode, &q->t_root);
3762f08a9a1SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
377ff704050Sstephen hemminger 	}
378d66280b1SPeter Oskolkov 
379d66280b1SPeter Oskolkov 	rtnl_kfree_skbs(q->t_head, q->t_tail);
380d66280b1SPeter Oskolkov 	q->t_head = NULL;
381d66280b1SPeter Oskolkov 	q->t_tail = NULL;
382ff704050Sstephen hemminger }
383ff704050Sstephen hemminger 
384960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
38550612537SEric Dumazet {
386aec0a40aSEric Dumazet 	struct netem_sched_data *q = qdisc_priv(sch);
387112f9cb6SDave Taht 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
388d66280b1SPeter Oskolkov 
389d66280b1SPeter Oskolkov 	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
390d66280b1SPeter Oskolkov 		if (q->t_tail)
391d66280b1SPeter Oskolkov 			q->t_tail->next = nskb;
392d66280b1SPeter Oskolkov 		else
393d66280b1SPeter Oskolkov 			q->t_head = nskb;
394d66280b1SPeter Oskolkov 		q->t_tail = nskb;
395d66280b1SPeter Oskolkov 	} else {
396aec0a40aSEric Dumazet 		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
39750612537SEric Dumazet 
398aec0a40aSEric Dumazet 		while (*p) {
399aec0a40aSEric Dumazet 			struct sk_buff *skb;
40050612537SEric Dumazet 
401aec0a40aSEric Dumazet 			parent = *p;
40218a4c0eaSEric Dumazet 			skb = rb_to_skb(parent);
40350612537SEric Dumazet 			if (tnext >= netem_skb_cb(skb)->time_to_send)
404aec0a40aSEric Dumazet 				p = &parent->rb_right;
405aec0a40aSEric Dumazet 			else
406aec0a40aSEric Dumazet 				p = &parent->rb_left;
40750612537SEric Dumazet 		}
40856b17425SEric Dumazet 		rb_link_node(&nskb->rbnode, parent, p);
40956b17425SEric Dumazet 		rb_insert_color(&nskb->rbnode, &q->t_root);
410d66280b1SPeter Oskolkov 	}
411aec0a40aSEric Dumazet 	sch->q.qlen++;
41250612537SEric Dumazet }
41350612537SEric Dumazet 
4146071bd1aSNeil Horman /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
4156071bd1aSNeil Horman  * when we statistically choose to corrupt one, we instead segment it, returning
4166071bd1aSNeil Horman  * the first packet to be corrupted, and re-enqueue the remaining frames
4176071bd1aSNeil Horman  */
418520ac30fSEric Dumazet static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
419520ac30fSEric Dumazet 				     struct sk_buff **to_free)
4206071bd1aSNeil Horman {
4216071bd1aSNeil Horman 	struct sk_buff *segs;
4226071bd1aSNeil Horman 	netdev_features_t features = netif_skb_features(skb);
4236071bd1aSNeil Horman 
4246071bd1aSNeil Horman 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
4256071bd1aSNeil Horman 
4266071bd1aSNeil Horman 	if (IS_ERR_OR_NULL(segs)) {
427520ac30fSEric Dumazet 		qdisc_drop(skb, sch, to_free);
4286071bd1aSNeil Horman 		return NULL;
4296071bd1aSNeil Horman 	}
4306071bd1aSNeil Horman 	consume_skb(skb);
4316071bd1aSNeil Horman 	return segs;
4326071bd1aSNeil Horman }
4336071bd1aSNeil Horman 
4340afb51e7SStephen Hemminger /*
4350afb51e7SStephen Hemminger  * Insert one skb into qdisc.
4360afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
4370afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
4380afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
4390afb51e7SStephen Hemminger  */
440520ac30fSEric Dumazet static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
441520ac30fSEric Dumazet 			 struct sk_buff **to_free)
4421da177e4SLinus Torvalds {
4431da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
44489e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
44589e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
4460afb51e7SStephen Hemminger 	struct sk_buff *skb2;
4476071bd1aSNeil Horman 	struct sk_buff *segs = NULL;
448177b8007SJakub Kicinski 	unsigned int prev_len = qdisc_pkt_len(skb);
4490afb51e7SStephen Hemminger 	int count = 1;
4506071bd1aSNeil Horman 	int rc = NET_XMIT_SUCCESS;
4515845f706SSheng Lan 	int rc_drop = NET_XMIT_DROP;
4521da177e4SLinus Torvalds 
4539410d386SChristoph Paasch 	/* Do not fool qdisc_drop_all() */
4549410d386SChristoph Paasch 	skb->prev = NULL;
4559410d386SChristoph Paasch 
4560afb51e7SStephen Hemminger 	/* Random duplication */
4570afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
4580afb51e7SStephen Hemminger 		++count;
4590afb51e7SStephen Hemminger 
460661b7972Sstephen hemminger 	/* Drop packet? */
461e4ae004bSEric Dumazet 	if (loss_event(q)) {
462e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
46325331d6cSJohn Fastabend 			qdisc_qstats_drop(sch); /* mark packet */
464e4ae004bSEric Dumazet 		else
4650afb51e7SStephen Hemminger 			--count;
466e4ae004bSEric Dumazet 	}
4670afb51e7SStephen Hemminger 	if (count == 0) {
46825331d6cSJohn Fastabend 		qdisc_qstats_drop(sch);
469520ac30fSEric Dumazet 		__qdisc_drop(skb, to_free);
470c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
4711da177e4SLinus Torvalds 	}
4721da177e4SLinus Torvalds 
4735a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
4745a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
4755a308f40SEric Dumazet 	 */
4765080f39eSNik Unger 	if (q->latency || q->jitter || q->rate)
477f2f872f9SEric Dumazet 		skb_orphan_partial(skb);
4784e8a5201SDavid S. Miller 
4790afb51e7SStephen Hemminger 	/*
4800afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
4810afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
4820afb51e7SStephen Hemminger 	 * skb will be queued.
483d5d75cd6SStephen Hemminger 	 */
4840afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
485159d2c7dSEric Dumazet 		struct Qdisc *rootq = qdisc_root_bh(sch);
4860afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
487d5d75cd6SStephen Hemminger 
488b396cca6SEric Dumazet 		q->duplicate = 0;
489520ac30fSEric Dumazet 		rootq->enqueue(skb2, rootq, to_free);
4900afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4915845f706SSheng Lan 		rc_drop = NET_XMIT_SUCCESS;
4921da177e4SLinus Torvalds 	}
4931da177e4SLinus Torvalds 
494c865e5d9SStephen Hemminger 	/*
495c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
496c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
497c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
498c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
499c865e5d9SStephen Hemminger 	 */
500c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
5016071bd1aSNeil Horman 		if (skb_is_gso(skb)) {
5023e14c383SJakub Kicinski 			skb = netem_segment(skb, sch, to_free);
5033e14c383SJakub Kicinski 			if (!skb)
5045845f706SSheng Lan 				return rc_drop;
5053e14c383SJakub Kicinski 			segs = skb->next;
5063e14c383SJakub Kicinski 			skb_mark_not_on_list(skb);
5073e14c383SJakub Kicinski 			qdisc_skb_cb(skb)->pkt_len = skb->len;
5086071bd1aSNeil Horman 		}
5096071bd1aSNeil Horman 
5108a6e9c67SEric Dumazet 		skb = skb_unshare(skb, GFP_ATOMIC);
5118a6e9c67SEric Dumazet 		if (unlikely(!skb)) {
5128a6e9c67SEric Dumazet 			qdisc_qstats_drop(sch);
5138a6e9c67SEric Dumazet 			goto finish_segs;
5148a6e9c67SEric Dumazet 		}
5158a6e9c67SEric Dumazet 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
5168a6e9c67SEric Dumazet 		    skb_checksum_help(skb)) {
5178a6e9c67SEric Dumazet 			qdisc_drop(skb, sch, to_free);
518a7fa12d1SJakub Kicinski 			skb = NULL;
5196071bd1aSNeil Horman 			goto finish_segs;
5206071bd1aSNeil Horman 		}
521c865e5d9SStephen Hemminger 
5228032bf12SJason A. Donenfeld 		skb->data[get_random_u32_below(skb_headlen(skb))] ^=
5238032bf12SJason A. Donenfeld 			1<<get_random_u32_below(8);
524c865e5d9SStephen Hemminger 	}
525c865e5d9SStephen Hemminger 
5265845f706SSheng Lan 	if (unlikely(sch->q.qlen >= sch->limit)) {
5273e14c383SJakub Kicinski 		/* re-link segs, so that qdisc_drop_all() frees them all */
5283e14c383SJakub Kicinski 		skb->next = segs;
5295845f706SSheng Lan 		qdisc_drop_all(skb, sch, to_free);
5305845f706SSheng Lan 		return rc_drop;
5315845f706SSheng Lan 	}
532960fb66eSEric Dumazet 
53325331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
534960fb66eSEric Dumazet 
5355f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
536f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
537a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
538f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
539112f9cb6SDave Taht 		u64 now;
540112f9cb6SDave Taht 		s64 delay;
54107aaa115SStephen Hemminger 
54207aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
54307aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
54407aaa115SStephen Hemminger 
545112f9cb6SDave Taht 		now = ktime_get_ns();
5467bc0f28cSHagen Paul Pfeifer 
5477bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
5485080f39eSNik Unger 			struct netem_skb_cb *last = NULL;
5497bc0f28cSHagen Paul Pfeifer 
5505080f39eSNik Unger 			if (sch->q.tail)
5515080f39eSNik Unger 				last = netem_skb_cb(sch->q.tail);
5525080f39eSNik Unger 			if (q->t_root.rb_node) {
5535080f39eSNik Unger 				struct sk_buff *t_skb;
5545080f39eSNik Unger 				struct netem_skb_cb *t_last;
5555080f39eSNik Unger 
55618a4c0eaSEric Dumazet 				t_skb = skb_rb_last(&q->t_root);
5575080f39eSNik Unger 				t_last = netem_skb_cb(t_skb);
5585080f39eSNik Unger 				if (!last ||
559d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
5605080f39eSNik Unger 					last = t_last;
5615080f39eSNik Unger 			}
562d66280b1SPeter Oskolkov 			if (q->t_tail) {
563d66280b1SPeter Oskolkov 				struct netem_skb_cb *t_last =
564d66280b1SPeter Oskolkov 					netem_skb_cb(q->t_tail);
565d66280b1SPeter Oskolkov 
566d66280b1SPeter Oskolkov 				if (!last ||
567d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
568d66280b1SPeter Oskolkov 					last = t_last;
5695080f39eSNik Unger 			}
5705080f39eSNik Unger 
571aec0a40aSEric Dumazet 			if (last) {
5727bc0f28cSHagen Paul Pfeifer 				/*
573a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
574a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
5757bc0f28cSHagen Paul Pfeifer 				 * from delay.
5767bc0f28cSHagen Paul Pfeifer 				 */
5775080f39eSNik Unger 				delay -= last->time_to_send - now;
578112f9cb6SDave Taht 				delay = max_t(s64, 0, delay);
5795080f39eSNik Unger 				now = last->time_to_send;
5807bc0f28cSHagen Paul Pfeifer 			}
581a13d3104SJohannes Naab 
582bce552fdSStephen Hemminger 			delay += packet_time_ns(qdisc_pkt_len(skb), q);
5837bc0f28cSHagen Paul Pfeifer 		}
5847bc0f28cSHagen Paul Pfeifer 
5857c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
5861da177e4SLinus Torvalds 		++q->counter;
587960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
5881da177e4SLinus Torvalds 	} else {
5890dca51d3SStephen Hemminger 		/*
5900dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
5910dca51d3SStephen Hemminger 		 * of the queue.
5920dca51d3SStephen Hemminger 		 */
593112f9cb6SDave Taht 		cb->time_to_send = ktime_get_ns();
5940dca51d3SStephen Hemminger 		q->counter = 0;
5958ba25dadSJarek Poplawski 
59659697730SDavid S. Miller 		__qdisc_enqueue_head(skb, &sch->q);
597eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
598378a2f09SJarek Poplawski 	}
5991da177e4SLinus Torvalds 
6006071bd1aSNeil Horman finish_segs:
6016071bd1aSNeil Horman 	if (segs) {
602177b8007SJakub Kicinski 		unsigned int len, last_len;
603a7fa12d1SJakub Kicinski 		int nb;
604177b8007SJakub Kicinski 
605a7fa12d1SJakub Kicinski 		len = skb ? skb->len : 0;
606a7fa12d1SJakub Kicinski 		nb = skb ? 1 : 0;
607177b8007SJakub Kicinski 
6086071bd1aSNeil Horman 		while (segs) {
6096071bd1aSNeil Horman 			skb2 = segs->next;
610a8305bffSDavid S. Miller 			skb_mark_not_on_list(segs);
6116071bd1aSNeil Horman 			qdisc_skb_cb(segs)->pkt_len = segs->len;
6126071bd1aSNeil Horman 			last_len = segs->len;
613520ac30fSEric Dumazet 			rc = qdisc_enqueue(segs, sch, to_free);
6146071bd1aSNeil Horman 			if (rc != NET_XMIT_SUCCESS) {
6156071bd1aSNeil Horman 				if (net_xmit_drop_count(rc))
6166071bd1aSNeil Horman 					qdisc_qstats_drop(sch);
6176071bd1aSNeil Horman 			} else {
6186071bd1aSNeil Horman 				nb++;
6196071bd1aSNeil Horman 				len += last_len;
6206071bd1aSNeil Horman 			}
6216071bd1aSNeil Horman 			segs = skb2;
6226071bd1aSNeil Horman 		}
623a7fa12d1SJakub Kicinski 		/* Parent qdiscs accounted for 1 skb of size @prev_len */
624a7fa12d1SJakub Kicinski 		qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
625e0ad032eSJakub Kicinski 	} else if (!skb) {
626e0ad032eSJakub Kicinski 		return NET_XMIT_DROP;
6276071bd1aSNeil Horman 	}
62810f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
6291da177e4SLinus Torvalds }
6301da177e4SLinus Torvalds 
631836af83bSDave Taht /* Delay the next round with a new future slot with a
632836af83bSDave Taht  * correct number of bytes and packets.
633836af83bSDave Taht  */
634836af83bSDave Taht 
635836af83bSDave Taht static void get_slot_next(struct netem_sched_data *q, u64 now)
636836af83bSDave Taht {
6370a9fe5c3SYousuk Seung 	s64 next_delay;
6380a9fe5c3SYousuk Seung 
6390a9fe5c3SYousuk Seung 	if (!q->slot_dist)
6400a9fe5c3SYousuk Seung 		next_delay = q->slot_config.min_delay +
641a251c17aSJason A. Donenfeld 				(get_random_u32() *
642836af83bSDave Taht 				 (q->slot_config.max_delay -
643836af83bSDave Taht 				  q->slot_config.min_delay) >> 32);
6440a9fe5c3SYousuk Seung 	else
6450a9fe5c3SYousuk Seung 		next_delay = tabledist(q->slot_config.dist_delay,
6460a9fe5c3SYousuk Seung 				       (s32)(q->slot_config.dist_jitter),
6470a9fe5c3SYousuk Seung 				       NULL, q->slot_dist);
6480a9fe5c3SYousuk Seung 
6490a9fe5c3SYousuk Seung 	q->slot.slot_next = now + next_delay;
650836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
651836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
652836af83bSDave Taht }
653836af83bSDave Taht 
654d66280b1SPeter Oskolkov static struct sk_buff *netem_peek(struct netem_sched_data *q)
655d66280b1SPeter Oskolkov {
656d66280b1SPeter Oskolkov 	struct sk_buff *skb = skb_rb_first(&q->t_root);
657d66280b1SPeter Oskolkov 	u64 t1, t2;
658d66280b1SPeter Oskolkov 
659d66280b1SPeter Oskolkov 	if (!skb)
660d66280b1SPeter Oskolkov 		return q->t_head;
661d66280b1SPeter Oskolkov 	if (!q->t_head)
662d66280b1SPeter Oskolkov 		return skb;
663d66280b1SPeter Oskolkov 
664d66280b1SPeter Oskolkov 	t1 = netem_skb_cb(skb)->time_to_send;
665d66280b1SPeter Oskolkov 	t2 = netem_skb_cb(q->t_head)->time_to_send;
666d66280b1SPeter Oskolkov 	if (t1 < t2)
667d66280b1SPeter Oskolkov 		return skb;
668d66280b1SPeter Oskolkov 	return q->t_head;
669d66280b1SPeter Oskolkov }
670d66280b1SPeter Oskolkov 
671d66280b1SPeter Oskolkov static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
672d66280b1SPeter Oskolkov {
673d66280b1SPeter Oskolkov 	if (skb == q->t_head) {
674d66280b1SPeter Oskolkov 		q->t_head = skb->next;
675d66280b1SPeter Oskolkov 		if (!q->t_head)
676d66280b1SPeter Oskolkov 			q->t_tail = NULL;
677d66280b1SPeter Oskolkov 	} else {
678d66280b1SPeter Oskolkov 		rb_erase(&skb->rbnode, &q->t_root);
679d66280b1SPeter Oskolkov 	}
680d66280b1SPeter Oskolkov }
681d66280b1SPeter Oskolkov 
6821da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
6831da177e4SLinus Torvalds {
6841da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6851da177e4SLinus Torvalds 	struct sk_buff *skb;
6861da177e4SLinus Torvalds 
68750612537SEric Dumazet tfifo_dequeue:
688ed760cb8SFlorian Westphal 	skb = __qdisc_dequeue_head(&sch->q);
689771018e7SStephen Hemminger 	if (skb) {
69025331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
6910ad2a836SBeshay, Joseph deliver:
692aec0a40aSEric Dumazet 		qdisc_bstats_update(sch, skb);
693aec0a40aSEric Dumazet 		return skb;
694aec0a40aSEric Dumazet 	}
695d66280b1SPeter Oskolkov 	skb = netem_peek(q);
696d66280b1SPeter Oskolkov 	if (skb) {
697112f9cb6SDave Taht 		u64 time_to_send;
698836af83bSDave Taht 		u64 now = ktime_get_ns();
69936b7bfe0SEric Dumazet 
7000f9f32acSStephen Hemminger 		/* if more time remaining? */
70136b7bfe0SEric Dumazet 		time_to_send = netem_skb_cb(skb)->time_to_send;
702836af83bSDave Taht 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
703836af83bSDave Taht 			get_slot_next(q, now);
704aec0a40aSEric Dumazet 
705836af83bSDave Taht 		if (time_to_send <= now && q->slot.slot_next <= now) {
706d66280b1SPeter Oskolkov 			netem_erase_head(q, skb);
707aec0a40aSEric Dumazet 			sch->q.qlen--;
7080ad2a836SBeshay, Joseph 			qdisc_qstats_backlog_dec(sch, skb);
709aec0a40aSEric Dumazet 			skb->next = NULL;
710aec0a40aSEric Dumazet 			skb->prev = NULL;
711bffa72cfSEric Dumazet 			/* skb->dev shares skb->rbnode area,
712bffa72cfSEric Dumazet 			 * we need to restore its value.
713bffa72cfSEric Dumazet 			 */
714bffa72cfSEric Dumazet 			skb->dev = qdisc_dev(sch);
71503c05f0dSJarek Poplawski 
716836af83bSDave Taht 			if (q->slot.slot_next) {
717836af83bSDave Taht 				q->slot.packets_left--;
718836af83bSDave Taht 				q->slot.bytes_left -= qdisc_pkt_len(skb);
719836af83bSDave Taht 				if (q->slot.packets_left <= 0 ||
720836af83bSDave Taht 				    q->slot.bytes_left <= 0)
721836af83bSDave Taht 					get_slot_next(q, now);
722836af83bSDave Taht 			}
723836af83bSDave Taht 
72450612537SEric Dumazet 			if (q->qdisc) {
72521de12eeSEric Dumazet 				unsigned int pkt_len = qdisc_pkt_len(skb);
726520ac30fSEric Dumazet 				struct sk_buff *to_free = NULL;
727520ac30fSEric Dumazet 				int err;
72850612537SEric Dumazet 
729520ac30fSEric Dumazet 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
730520ac30fSEric Dumazet 				kfree_skb_list(to_free);
73121de12eeSEric Dumazet 				if (err != NET_XMIT_SUCCESS &&
73221de12eeSEric Dumazet 				    net_xmit_drop_count(err)) {
73325331d6cSJohn Fastabend 					qdisc_qstats_drop(sch);
7342ccccf5fSWANG Cong 					qdisc_tree_reduce_backlog(sch, 1,
73521de12eeSEric Dumazet 								  pkt_len);
73650612537SEric Dumazet 				}
73750612537SEric Dumazet 				goto tfifo_dequeue;
73850612537SEric Dumazet 			}
739aec0a40aSEric Dumazet 			goto deliver;
74011274e5aSStephen Hemminger 		}
74107aaa115SStephen Hemminger 
74250612537SEric Dumazet 		if (q->qdisc) {
74350612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
74450612537SEric Dumazet 			if (skb)
74550612537SEric Dumazet 				goto deliver;
74650612537SEric Dumazet 		}
747836af83bSDave Taht 
748836af83bSDave Taht 		qdisc_watchdog_schedule_ns(&q->watchdog,
749836af83bSDave Taht 					   max(time_to_send,
750836af83bSDave Taht 					       q->slot.slot_next));
7510f9f32acSStephen Hemminger 	}
7520f9f32acSStephen Hemminger 
75350612537SEric Dumazet 	if (q->qdisc) {
75450612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
75550612537SEric Dumazet 		if (skb)
75650612537SEric Dumazet 			goto deliver;
75750612537SEric Dumazet 	}
7580f9f32acSStephen Hemminger 	return NULL;
7591da177e4SLinus Torvalds }
7601da177e4SLinus Torvalds 
7611da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
7621da177e4SLinus Torvalds {
7631da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7641da177e4SLinus Torvalds 
76550612537SEric Dumazet 	qdisc_reset_queue(sch);
766ff704050Sstephen hemminger 	tfifo_reset(sch);
76750612537SEric Dumazet 	if (q->qdisc)
7681da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
76959cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
7701da177e4SLinus Torvalds }
7711da177e4SLinus Torvalds 
7726373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
7736373a9a2Sstephen hemminger {
7744cb28970SWANG Cong 	kvfree(d);
7756373a9a2Sstephen hemminger }
7766373a9a2Sstephen hemminger 
7771da177e4SLinus Torvalds /*
7781da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
7791da177e4SLinus Torvalds  * signed 16 bit values.
7801da177e4SLinus Torvalds  */
781836af83bSDave Taht 
78211b73313SEric Dumazet static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
7831da177e4SLinus Torvalds {
7846373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
7851e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
7861da177e4SLinus Torvalds 	struct disttable *d;
7871da177e4SLinus Torvalds 	int i;
7881da177e4SLinus Torvalds 
789b41d936bSEric Dumazet 	if (!n || n > NETEM_DIST_MAX)
7901da177e4SLinus Torvalds 		return -EINVAL;
7911da177e4SLinus Torvalds 
79212929198SGustavo A. R. Silva 	d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
7931da177e4SLinus Torvalds 	if (!d)
7941da177e4SLinus Torvalds 		return -ENOMEM;
7951da177e4SLinus Torvalds 
7961da177e4SLinus Torvalds 	d->size = n;
7971da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
7981da177e4SLinus Torvalds 		d->table[i] = data[i];
7991da177e4SLinus Torvalds 
80011b73313SEric Dumazet 	*tbl = d;
8011da177e4SLinus Torvalds 	return 0;
8021da177e4SLinus Torvalds }
8031da177e4SLinus Torvalds 
804836af83bSDave Taht static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
805836af83bSDave Taht {
806836af83bSDave Taht 	const struct tc_netem_slot *c = nla_data(attr);
807836af83bSDave Taht 
808836af83bSDave Taht 	q->slot_config = *c;
809836af83bSDave Taht 	if (q->slot_config.max_packets == 0)
810836af83bSDave Taht 		q->slot_config.max_packets = INT_MAX;
811836af83bSDave Taht 	if (q->slot_config.max_bytes == 0)
812836af83bSDave Taht 		q->slot_config.max_bytes = INT_MAX;
813eadd1befSAleksandr Nogikh 
814eadd1befSAleksandr Nogikh 	/* capping dist_jitter to the range acceptable by tabledist() */
815eadd1befSAleksandr Nogikh 	q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
816eadd1befSAleksandr Nogikh 
817836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
818836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
8190a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
8200a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter)
821836af83bSDave Taht 		q->slot.slot_next = ktime_get_ns();
822836af83bSDave Taht 	else
823836af83bSDave Taht 		q->slot.slot_next = 0;
824836af83bSDave Taht }
825836af83bSDave Taht 
82649545a77SYang Yingliang static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
8271da177e4SLinus Torvalds {
8281e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
8291da177e4SLinus Torvalds 
8301da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
8311da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
8321da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
8331da177e4SLinus Torvalds }
8341da177e4SLinus Torvalds 
83549545a77SYang Yingliang static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
8360dca51d3SStephen Hemminger {
8371e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
8380dca51d3SStephen Hemminger 
8390dca51d3SStephen Hemminger 	q->reorder = r->probability;
8400dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
8410dca51d3SStephen Hemminger }
8420dca51d3SStephen Hemminger 
84349545a77SYang Yingliang static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
844c865e5d9SStephen Hemminger {
8451e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
846c865e5d9SStephen Hemminger 
847c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
848c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
849c865e5d9SStephen Hemminger }
850c865e5d9SStephen Hemminger 
85149545a77SYang Yingliang static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
8527bc0f28cSHagen Paul Pfeifer {
8537bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
8547bc0f28cSHagen Paul Pfeifer 
8557bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
85690b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
85790b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
858809fa972SHannes Frederic Sowa 	q->cell_overhead = r->cell_overhead;
85990b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
86090b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
861809fa972SHannes Frederic Sowa 	else
862809fa972SHannes Frederic Sowa 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
8637bc0f28cSHagen Paul Pfeifer }
8647bc0f28cSHagen Paul Pfeifer 
86549545a77SYang Yingliang static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
866661b7972Sstephen hemminger {
867661b7972Sstephen hemminger 	const struct nlattr *la;
868661b7972Sstephen hemminger 	int rem;
869661b7972Sstephen hemminger 
870661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
871661b7972Sstephen hemminger 		u16 type = nla_type(la);
872661b7972Sstephen hemminger 
873661b7972Sstephen hemminger 		switch (type) {
874661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
875661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
876661b7972Sstephen hemminger 
8772494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
878661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
879661b7972Sstephen hemminger 				return -EINVAL;
880661b7972Sstephen hemminger 			}
881661b7972Sstephen hemminger 
882661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
883661b7972Sstephen hemminger 
8843fbac2a8SYang Yingliang 			q->clg.state = TX_IN_GAP_PERIOD;
885661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
886661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
887661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
888661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
889661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
890661b7972Sstephen hemminger 			break;
891661b7972Sstephen hemminger 		}
892661b7972Sstephen hemminger 
893661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
894661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
895661b7972Sstephen hemminger 
8962494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
8972494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
898661b7972Sstephen hemminger 				return -EINVAL;
899661b7972Sstephen hemminger 			}
900661b7972Sstephen hemminger 
901661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
9023fbac2a8SYang Yingliang 			q->clg.state = GOOD_STATE;
903661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
904661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
905661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
906661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
907661b7972Sstephen hemminger 			break;
908661b7972Sstephen hemminger 		}
909661b7972Sstephen hemminger 
910661b7972Sstephen hemminger 		default:
911661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
912661b7972Sstephen hemminger 			return -EINVAL;
913661b7972Sstephen hemminger 		}
914661b7972Sstephen hemminger 	}
915661b7972Sstephen hemminger 
916661b7972Sstephen hemminger 	return 0;
917661b7972Sstephen hemminger }
918661b7972Sstephen hemminger 
91927a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
92027a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
92127a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
92227a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
9237bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
924661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
925e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
9266a031f67SYang Yingliang 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
92799803171SDave Taht 	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
92899803171SDave Taht 	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
929836af83bSDave Taht 	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
930*4072d97dSFrançois Michel 	[TCA_NETEM_PRNG_SEED]	= { .type = NLA_U64 },
93127a3421eSPatrick McHardy };
93227a3421eSPatrick McHardy 
9332c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
9342c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
9352c10b32bSThomas Graf {
9362c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
9372c10b32bSThomas Graf 
938661b7972Sstephen hemminger 	if (nested_len < 0) {
939661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
9402c10b32bSThomas Graf 		return -EINVAL;
941661b7972Sstephen hemminger 	}
942661b7972Sstephen hemminger 
9432c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
9448cb08174SJohannes Berg 		return nla_parse_deprecated(tb, maxtype,
9458cb08174SJohannes Berg 					    nla_data(nla) + NLA_ALIGN(len),
946fceb6435SJohannes Berg 					    nested_len, policy, NULL);
947661b7972Sstephen hemminger 
9482c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
9492c10b32bSThomas Graf 	return 0;
9502c10b32bSThomas Graf }
9512c10b32bSThomas Graf 
952c865e5d9SStephen Hemminger /* Parse netlink message to set options */
9532030721cSAlexander Aring static int netem_change(struct Qdisc *sch, struct nlattr *opt,
9542030721cSAlexander Aring 			struct netlink_ext_ack *extack)
9551da177e4SLinus Torvalds {
9561da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
957b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
95811b73313SEric Dumazet 	struct disttable *delay_dist = NULL;
95911b73313SEric Dumazet 	struct disttable *slot_dist = NULL;
9601da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
96154a4b05cSYang Yingliang 	struct clgstate old_clg;
96254a4b05cSYang Yingliang 	int old_loss_model = CLG_RANDOM;
9631da177e4SLinus Torvalds 	int ret;
9641da177e4SLinus Torvalds 
9652c10b32bSThomas Graf 	qopt = nla_data(opt);
9662c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
967b03f4672SPatrick McHardy 	if (ret < 0)
968b03f4672SPatrick McHardy 		return ret;
969b03f4672SPatrick McHardy 
97011b73313SEric Dumazet 	if (tb[TCA_NETEM_DELAY_DIST]) {
97111b73313SEric Dumazet 		ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
97211b73313SEric Dumazet 		if (ret)
97311b73313SEric Dumazet 			goto table_free;
97411b73313SEric Dumazet 	}
97511b73313SEric Dumazet 
97611b73313SEric Dumazet 	if (tb[TCA_NETEM_SLOT_DIST]) {
97711b73313SEric Dumazet 		ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
97811b73313SEric Dumazet 		if (ret)
97911b73313SEric Dumazet 			goto table_free;
98011b73313SEric Dumazet 	}
98111b73313SEric Dumazet 
9822174a08dSEric Dumazet 	sch_tree_lock(sch);
98354a4b05cSYang Yingliang 	/* backup q->clg and q->loss_model */
98454a4b05cSYang Yingliang 	old_clg = q->clg;
98554a4b05cSYang Yingliang 	old_loss_model = q->loss_model;
98654a4b05cSYang Yingliang 
98754a4b05cSYang Yingliang 	if (tb[TCA_NETEM_LOSS]) {
98849545a77SYang Yingliang 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
98954a4b05cSYang Yingliang 		if (ret) {
99054a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
99111b73313SEric Dumazet 			q->clg = old_clg;
9922174a08dSEric Dumazet 			goto unlock;
99354a4b05cSYang Yingliang 		}
99454a4b05cSYang Yingliang 	} else {
99554a4b05cSYang Yingliang 		q->loss_model = CLG_RANDOM;
99654a4b05cSYang Yingliang 	}
99754a4b05cSYang Yingliang 
99811b73313SEric Dumazet 	if (delay_dist)
99911b73313SEric Dumazet 		swap(q->delay_dist, delay_dist);
100011b73313SEric Dumazet 	if (slot_dist)
100111b73313SEric Dumazet 		swap(q->slot_dist, slot_dist);
100250612537SEric Dumazet 	sch->limit = qopt->limit;
10031da177e4SLinus Torvalds 
1004112f9cb6SDave Taht 	q->latency = PSCHED_TICKS2NS(qopt->latency);
1005112f9cb6SDave Taht 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
10061da177e4SLinus Torvalds 	q->limit = qopt->limit;
10071da177e4SLinus Torvalds 	q->gap = qopt->gap;
10080dca51d3SStephen Hemminger 	q->counter = 0;
10091da177e4SLinus Torvalds 	q->loss = qopt->loss;
10101da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
10111da177e4SLinus Torvalds 
1012bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
1013bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
10140dca51d3SStephen Hemminger 	 */
1015a362e0a7SStephen Hemminger 	if (q->gap)
10160dca51d3SStephen Hemminger 		q->reorder = ~0;
10170dca51d3SStephen Hemminger 
1018265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
101949545a77SYang Yingliang 		get_correlation(q, tb[TCA_NETEM_CORR]);
10201da177e4SLinus Torvalds 
1021265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
102249545a77SYang Yingliang 		get_reorder(q, tb[TCA_NETEM_REORDER]);
10231da177e4SLinus Torvalds 
1024265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
102549545a77SYang Yingliang 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
10261da177e4SLinus Torvalds 
10277bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
102849545a77SYang Yingliang 		get_rate(q, tb[TCA_NETEM_RATE]);
10297bc0f28cSHagen Paul Pfeifer 
10306a031f67SYang Yingliang 	if (tb[TCA_NETEM_RATE64])
10316a031f67SYang Yingliang 		q->rate = max_t(u64, q->rate,
10326a031f67SYang Yingliang 				nla_get_u64(tb[TCA_NETEM_RATE64]));
10336a031f67SYang Yingliang 
103499803171SDave Taht 	if (tb[TCA_NETEM_LATENCY64])
103599803171SDave Taht 		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
103699803171SDave Taht 
103799803171SDave Taht 	if (tb[TCA_NETEM_JITTER64])
103899803171SDave Taht 		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
103999803171SDave Taht 
1040e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
1041e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1042e4ae004bSEric Dumazet 
1043836af83bSDave Taht 	if (tb[TCA_NETEM_SLOT])
1044836af83bSDave Taht 		get_slot(q, tb[TCA_NETEM_SLOT]);
1045836af83bSDave Taht 
1046eadd1befSAleksandr Nogikh 	/* capping jitter to the range acceptable by tabledist() */
1047eadd1befSAleksandr Nogikh 	q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
1048eadd1befSAleksandr Nogikh 
1049*4072d97dSFrançois Michel 	if (tb[TCA_NETEM_PRNG_SEED])
1050*4072d97dSFrançois Michel 		q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
1051*4072d97dSFrançois Michel 	else
1052*4072d97dSFrançois Michel 		q->prng.seed = get_random_u64();
1053*4072d97dSFrançois Michel 	prandom_seed_state(&q->prng.prng_state, q->prng.seed);
1054*4072d97dSFrançois Michel 
10552174a08dSEric Dumazet unlock:
10562174a08dSEric Dumazet 	sch_tree_unlock(sch);
105711b73313SEric Dumazet 
105811b73313SEric Dumazet table_free:
105911b73313SEric Dumazet 	dist_free(delay_dist);
106011b73313SEric Dumazet 	dist_free(slot_dist);
1061661b7972Sstephen hemminger 	return ret;
10621da177e4SLinus Torvalds }
10631da177e4SLinus Torvalds 
1064e63d7dfdSAlexander Aring static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1065e63d7dfdSAlexander Aring 		      struct netlink_ext_ack *extack)
10661da177e4SLinus Torvalds {
10671da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10681da177e4SLinus Torvalds 	int ret;
10691da177e4SLinus Torvalds 
1070634576a1SNikolay Aleksandrov 	qdisc_watchdog_init(&q->watchdog, sch);
1071634576a1SNikolay Aleksandrov 
10721da177e4SLinus Torvalds 	if (!opt)
10731da177e4SLinus Torvalds 		return -EINVAL;
10741da177e4SLinus Torvalds 
1075661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
10762030721cSAlexander Aring 	ret = netem_change(sch, opt, extack);
107750612537SEric Dumazet 	if (ret)
1078250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
10791da177e4SLinus Torvalds 	return ret;
10801da177e4SLinus Torvalds }
10811da177e4SLinus Torvalds 
10821da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
10831da177e4SLinus Torvalds {
10841da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10851da177e4SLinus Torvalds 
108659cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
108750612537SEric Dumazet 	if (q->qdisc)
108886bd446bSVlad Buslov 		qdisc_put(q->qdisc);
10896373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
10900a9fe5c3SYousuk Seung 	dist_free(q->slot_dist);
10911da177e4SLinus Torvalds }
10921da177e4SLinus Torvalds 
1093661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
1094661b7972Sstephen hemminger 			   struct sk_buff *skb)
1095661b7972Sstephen hemminger {
1096661b7972Sstephen hemminger 	struct nlattr *nest;
1097661b7972Sstephen hemminger 
1098ae0be8deSMichal Kubecek 	nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
1099661b7972Sstephen hemminger 	if (nest == NULL)
1100661b7972Sstephen hemminger 		goto nla_put_failure;
1101661b7972Sstephen hemminger 
1102661b7972Sstephen hemminger 	switch (q->loss_model) {
1103661b7972Sstephen hemminger 	case CLG_RANDOM:
1104661b7972Sstephen hemminger 		/* legacy loss model */
1105661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
1106661b7972Sstephen hemminger 		return 0;	/* no data */
1107661b7972Sstephen hemminger 
1108661b7972Sstephen hemminger 	case CLG_4_STATES: {
1109661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
1110661b7972Sstephen hemminger 			.p13 = q->clg.a1,
1111661b7972Sstephen hemminger 			.p31 = q->clg.a2,
1112661b7972Sstephen hemminger 			.p32 = q->clg.a3,
1113661b7972Sstephen hemminger 			.p14 = q->clg.a4,
1114661b7972Sstephen hemminger 			.p23 = q->clg.a5,
1115661b7972Sstephen hemminger 		};
1116661b7972Sstephen hemminger 
11171b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
11181b34ec43SDavid S. Miller 			goto nla_put_failure;
1119661b7972Sstephen hemminger 		break;
1120661b7972Sstephen hemminger 	}
1121661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
1122661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
1123661b7972Sstephen hemminger 			.p = q->clg.a1,
1124661b7972Sstephen hemminger 			.r = q->clg.a2,
1125661b7972Sstephen hemminger 			.h = q->clg.a3,
1126661b7972Sstephen hemminger 			.k1 = q->clg.a4,
1127661b7972Sstephen hemminger 		};
1128661b7972Sstephen hemminger 
11291b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
11301b34ec43SDavid S. Miller 			goto nla_put_failure;
1131661b7972Sstephen hemminger 		break;
1132661b7972Sstephen hemminger 	}
1133661b7972Sstephen hemminger 	}
1134661b7972Sstephen hemminger 
1135661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
1136661b7972Sstephen hemminger 	return 0;
1137661b7972Sstephen hemminger 
1138661b7972Sstephen hemminger nla_put_failure:
1139661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
1140661b7972Sstephen hemminger 	return -1;
1141661b7972Sstephen hemminger }
1142661b7972Sstephen hemminger 
11431da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
11441da177e4SLinus Torvalds {
11451da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
1146861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
11471da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
11481da177e4SLinus Torvalds 	struct tc_netem_corr cor;
11490dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
1150c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
11517bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
1152836af83bSDave Taht 	struct tc_netem_slot slot;
11531da177e4SLinus Torvalds 
1154a2b1a5d4SPeilin Ye 	qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
1155112f9cb6SDave Taht 			     UINT_MAX);
1156a2b1a5d4SPeilin Ye 	qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
1157112f9cb6SDave Taht 			    UINT_MAX);
11581da177e4SLinus Torvalds 	qopt.limit = q->limit;
11591da177e4SLinus Torvalds 	qopt.loss = q->loss;
11601da177e4SLinus Torvalds 	qopt.gap = q->gap;
11611da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
11621b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
11631b34ec43SDavid S. Miller 		goto nla_put_failure;
11641da177e4SLinus Torvalds 
116599803171SDave Taht 	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
116699803171SDave Taht 		goto nla_put_failure;
116799803171SDave Taht 
116899803171SDave Taht 	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
116999803171SDave Taht 		goto nla_put_failure;
117099803171SDave Taht 
11711da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
11721da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
11731da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
11741b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
11751b34ec43SDavid S. Miller 		goto nla_put_failure;
11760dca51d3SStephen Hemminger 
11770dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
11780dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
11791b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
11801b34ec43SDavid S. Miller 		goto nla_put_failure;
11810dca51d3SStephen Hemminger 
1182c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
1183c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
11841b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
11851b34ec43SDavid S. Miller 		goto nla_put_failure;
1186c865e5d9SStephen Hemminger 
11876a031f67SYang Yingliang 	if (q->rate >= (1ULL << 32)) {
11882a51c1e8SNicolas Dichtel 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
11892a51c1e8SNicolas Dichtel 				      TCA_NETEM_PAD))
11906a031f67SYang Yingliang 			goto nla_put_failure;
11916a031f67SYang Yingliang 		rate.rate = ~0U;
11926a031f67SYang Yingliang 	} else {
11937bc0f28cSHagen Paul Pfeifer 		rate.rate = q->rate;
11946a031f67SYang Yingliang 	}
119590b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
119690b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
119790b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
11981b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
11991b34ec43SDavid S. Miller 		goto nla_put_failure;
12007bc0f28cSHagen Paul Pfeifer 
1201e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1202e4ae004bSEric Dumazet 		goto nla_put_failure;
1203e4ae004bSEric Dumazet 
1204661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
1205661b7972Sstephen hemminger 		goto nla_put_failure;
1206661b7972Sstephen hemminger 
12070a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
12080a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter) {
1209836af83bSDave Taht 		slot = q->slot_config;
1210836af83bSDave Taht 		if (slot.max_packets == INT_MAX)
1211836af83bSDave Taht 			slot.max_packets = 0;
1212836af83bSDave Taht 		if (slot.max_bytes == INT_MAX)
1213836af83bSDave Taht 			slot.max_bytes = 0;
1214836af83bSDave Taht 		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1215836af83bSDave Taht 			goto nla_put_failure;
1216836af83bSDave Taht 	}
1217836af83bSDave Taht 
1218*4072d97dSFrançois Michel 	if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
1219*4072d97dSFrançois Michel 			      TCA_NETEM_PAD))
1220*4072d97dSFrançois Michel 		goto nla_put_failure;
1221*4072d97dSFrançois Michel 
1222861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
12231da177e4SLinus Torvalds 
12241e90474cSPatrick McHardy nla_put_failure:
1225861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
12261da177e4SLinus Torvalds 	return -1;
12271da177e4SLinus Torvalds }
12281da177e4SLinus Torvalds 
122910f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
123010f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
123110f6dfcfSstephen hemminger {
123210f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
123310f6dfcfSstephen hemminger 
123450612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
123510f6dfcfSstephen hemminger 		return -ENOENT;
123610f6dfcfSstephen hemminger 
123710f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
123810f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
123910f6dfcfSstephen hemminger 
124010f6dfcfSstephen hemminger 	return 0;
124110f6dfcfSstephen hemminger }
124210f6dfcfSstephen hemminger 
124310f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1244653d6fd6SAlexander Aring 		     struct Qdisc **old, struct netlink_ext_ack *extack)
124510f6dfcfSstephen hemminger {
124610f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
124710f6dfcfSstephen hemminger 
124886a7996cSWANG Cong 	*old = qdisc_replace(sch, new, &q->qdisc);
124910f6dfcfSstephen hemminger 	return 0;
125010f6dfcfSstephen hemminger }
125110f6dfcfSstephen hemminger 
125210f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
125310f6dfcfSstephen hemminger {
125410f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
125510f6dfcfSstephen hemminger 	return q->qdisc;
125610f6dfcfSstephen hemminger }
125710f6dfcfSstephen hemminger 
1258143976ceSWANG Cong static unsigned long netem_find(struct Qdisc *sch, u32 classid)
125910f6dfcfSstephen hemminger {
126010f6dfcfSstephen hemminger 	return 1;
126110f6dfcfSstephen hemminger }
126210f6dfcfSstephen hemminger 
126310f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
126410f6dfcfSstephen hemminger {
126510f6dfcfSstephen hemminger 	if (!walker->stop) {
1266e046fa89SZhengchao Shao 		if (!tc_qdisc_stats_dump(sch, 1, walker))
126710f6dfcfSstephen hemminger 			return;
126810f6dfcfSstephen hemminger 	}
126910f6dfcfSstephen hemminger }
127010f6dfcfSstephen hemminger 
127110f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
127210f6dfcfSstephen hemminger 	.graft		=	netem_graft,
127310f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
1274143976ceSWANG Cong 	.find		=	netem_find,
127510f6dfcfSstephen hemminger 	.walk		=	netem_walk,
127610f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
127710f6dfcfSstephen hemminger };
127810f6dfcfSstephen hemminger 
127920fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
12801da177e4SLinus Torvalds 	.id		=	"netem",
128110f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
12821da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
12831da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
12841da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
128577be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
12861da177e4SLinus Torvalds 	.init		=	netem_init,
12871da177e4SLinus Torvalds 	.reset		=	netem_reset,
12881da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
12891da177e4SLinus Torvalds 	.change		=	netem_change,
12901da177e4SLinus Torvalds 	.dump		=	netem_dump,
12911da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
12921da177e4SLinus Torvalds };
12931da177e4SLinus Torvalds 
12941da177e4SLinus Torvalds 
12951da177e4SLinus Torvalds static int __init netem_module_init(void)
12961da177e4SLinus Torvalds {
1297eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
12981da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
12991da177e4SLinus Torvalds }
13001da177e4SLinus Torvalds static void __exit netem_module_exit(void)
13011da177e4SLinus Torvalds {
13021da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
13031da177e4SLinus Torvalds }
13041da177e4SLinus Torvalds module_init(netem_module_init)
13051da177e4SLinus Torvalds module_exit(netem_module_exit)
13061da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1307