xref: /openbmc/linux/net/sched/sch_netem.c (revision 8cb08174)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
26aec0a40aSEric Dumazet #include <linux/rbtree.h>
271da177e4SLinus Torvalds 
28dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
291da177e4SLinus Torvalds #include <net/pkt_sched.h>
30e4ae004bSEric Dumazet #include <net/inet_ecn.h>
311da177e4SLinus Torvalds 
32250a65f7Sstephen hemminger #define VERSION "1.3"
33eb229c4cSStephen Hemminger 
341da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
351da177e4SLinus Torvalds 	====================================
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
381da177e4SLinus Torvalds 		 Network Emulation Tool
391da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
401da177e4SLinus Torvalds 
411da177e4SLinus Torvalds 	 ----------------------------------------------------------------
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
441da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
451da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
461da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
471da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
481da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
491da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
521da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
531da177e4SLinus Torvalds 	 control either since that can be handled by using token
541da177e4SLinus Torvalds 	 bucket or other rate control.
55661b7972Sstephen hemminger 
56661b7972Sstephen hemminger      Correlated Loss Generator models
57661b7972Sstephen hemminger 
58661b7972Sstephen hemminger 	Added generation of correlated loss according to the
59661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
60661b7972Sstephen hemminger 
61661b7972Sstephen hemminger 	References:
62661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
65661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
66661b7972Sstephen hemminger 
67661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
691da177e4SLinus Torvalds */
701da177e4SLinus Torvalds 
710a9fe5c3SYousuk Seung struct disttable {
720a9fe5c3SYousuk Seung 	u32  size;
730a9fe5c3SYousuk Seung 	s16 table[0];
740a9fe5c3SYousuk Seung };
750a9fe5c3SYousuk Seung 
761da177e4SLinus Torvalds struct netem_sched_data {
77aec0a40aSEric Dumazet 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
78aec0a40aSEric Dumazet 	struct rb_root t_root;
7950612537SEric Dumazet 
80d66280b1SPeter Oskolkov 	/* a linear queue; reduces rbtree rebalancing when jitter is low */
81d66280b1SPeter Oskolkov 	struct sk_buff	*t_head;
82d66280b1SPeter Oskolkov 	struct sk_buff	*t_tail;
83d66280b1SPeter Oskolkov 
8450612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
851da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
8650612537SEric Dumazet 
8759cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
881da177e4SLinus Torvalds 
89112f9cb6SDave Taht 	s64 latency;
90112f9cb6SDave Taht 	s64 jitter;
91b407621cSStephen Hemminger 
921da177e4SLinus Torvalds 	u32 loss;
93e4ae004bSEric Dumazet 	u32 ecn;
941da177e4SLinus Torvalds 	u32 limit;
951da177e4SLinus Torvalds 	u32 counter;
961da177e4SLinus Torvalds 	u32 gap;
971da177e4SLinus Torvalds 	u32 duplicate;
980dca51d3SStephen Hemminger 	u32 reorder;
99c865e5d9SStephen Hemminger 	u32 corrupt;
1006a031f67SYang Yingliang 	u64 rate;
10190b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
10290b41a1cSHagen Paul Pfeifer 	u32 cell_size;
103809fa972SHannes Frederic Sowa 	struct reciprocal_value cell_size_reciprocal;
10490b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds 	struct crndstate {
107b407621cSStephen Hemminger 		u32 last;
108b407621cSStephen Hemminger 		u32 rho;
109c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1101da177e4SLinus Torvalds 
1110a9fe5c3SYousuk Seung 	struct disttable *delay_dist;
112661b7972Sstephen hemminger 
113661b7972Sstephen hemminger 	enum  {
114661b7972Sstephen hemminger 		CLG_RANDOM,
115661b7972Sstephen hemminger 		CLG_4_STATES,
116661b7972Sstephen hemminger 		CLG_GILB_ELL,
117661b7972Sstephen hemminger 	} loss_model;
118661b7972Sstephen hemminger 
119a6e2fe17SYang Yingliang 	enum {
120a6e2fe17SYang Yingliang 		TX_IN_GAP_PERIOD = 1,
121a6e2fe17SYang Yingliang 		TX_IN_BURST_PERIOD,
122a6e2fe17SYang Yingliang 		LOST_IN_GAP_PERIOD,
123a6e2fe17SYang Yingliang 		LOST_IN_BURST_PERIOD,
124a6e2fe17SYang Yingliang 	} _4_state_model;
125a6e2fe17SYang Yingliang 
126c045a734SYang Yingliang 	enum {
127c045a734SYang Yingliang 		GOOD_STATE = 1,
128c045a734SYang Yingliang 		BAD_STATE,
129c045a734SYang Yingliang 	} GE_state_model;
130c045a734SYang Yingliang 
131661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
132661b7972Sstephen hemminger 	struct clgstate {
133661b7972Sstephen hemminger 		/* state of the Markov chain */
134661b7972Sstephen hemminger 		u8 state;
135661b7972Sstephen hemminger 
136661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
137661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
138661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
139661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
140661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
141661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
142661b7972Sstephen hemminger 	} clg;
143661b7972Sstephen hemminger 
144836af83bSDave Taht 	struct tc_netem_slot slot_config;
145836af83bSDave Taht 	struct slotstate {
146836af83bSDave Taht 		u64 slot_next;
147836af83bSDave Taht 		s32 packets_left;
148836af83bSDave Taht 		s32 bytes_left;
149836af83bSDave Taht 	} slot;
150836af83bSDave Taht 
1510a9fe5c3SYousuk Seung 	struct disttable *slot_dist;
1521da177e4SLinus Torvalds };
1531da177e4SLinus Torvalds 
15450612537SEric Dumazet /* Time stamp put into socket buffer control block
15550612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
15656b17425SEric Dumazet  *
15756b17425SEric Dumazet  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
15856b17425SEric Dumazet  * and skb->next & skb->prev are scratch space for a qdisc,
15956b17425SEric Dumazet  * we save skb->tstamp value in skb->cb[] before destroying it.
16050612537SEric Dumazet  */
1611da177e4SLinus Torvalds struct netem_skb_cb {
162112f9cb6SDave Taht 	u64	        time_to_send;
1631da177e4SLinus Torvalds };
1641da177e4SLinus Torvalds 
1655f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1665f86173bSJussi Kivilinna {
167aec0a40aSEric Dumazet 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
16816bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
169175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1705f86173bSJussi Kivilinna }
1715f86173bSJussi Kivilinna 
1721da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1731da177e4SLinus Torvalds  * Use entropy source for initial seed.
1741da177e4SLinus Torvalds  */
1751da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1761da177e4SLinus Torvalds {
1771da177e4SLinus Torvalds 	state->rho = rho;
17863862b5bSAruna-Hewapathirane 	state->last = prandom_u32();
1791da177e4SLinus Torvalds }
1801da177e4SLinus Torvalds 
1811da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1821da177e4SLinus Torvalds  * Next number depends on last value.
1831da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1841da177e4SLinus Torvalds  */
185b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1861da177e4SLinus Torvalds {
1871da177e4SLinus Torvalds 	u64 value, rho;
1881da177e4SLinus Torvalds 	unsigned long answer;
1891da177e4SLinus Torvalds 
1900a9fe5c3SYousuk Seung 	if (!state || state->rho == 0)	/* no correlation */
19163862b5bSAruna-Hewapathirane 		return prandom_u32();
1921da177e4SLinus Torvalds 
19363862b5bSAruna-Hewapathirane 	value = prandom_u32();
1941da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1951da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1961da177e4SLinus Torvalds 	state->last = answer;
1971da177e4SLinus Torvalds 	return answer;
1981da177e4SLinus Torvalds }
1991da177e4SLinus Torvalds 
200661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
201661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
202661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
203661b7972Sstephen hemminger  */
204661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
205661b7972Sstephen hemminger {
206661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
20763862b5bSAruna-Hewapathirane 	u32 rnd = prandom_u32();
208661b7972Sstephen hemminger 
209661b7972Sstephen hemminger 	/*
21025985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
211661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
212661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
213661b7972Sstephen hemminger 	 * The four states correspond to:
214a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
215a6e2fe17SYang Yingliang 	 *   LOST_IN_BURST_PERIOD => isolated losses within a gap period
216a6e2fe17SYang Yingliang 	 *   LOST_IN_GAP_PERIOD => lost packets within a burst period
217a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
218661b7972Sstephen hemminger 	 */
219661b7972Sstephen hemminger 	switch (clg->state) {
220a6e2fe17SYang Yingliang 	case TX_IN_GAP_PERIOD:
221661b7972Sstephen hemminger 		if (rnd < clg->a4) {
222a6e2fe17SYang Yingliang 			clg->state = LOST_IN_BURST_PERIOD;
223661b7972Sstephen hemminger 			return true;
224ab6c27beSstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
225a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
226661b7972Sstephen hemminger 			return true;
227a6e2fe17SYang Yingliang 		} else if (clg->a1 + clg->a4 < rnd) {
228a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
229a6e2fe17SYang Yingliang 		}
230661b7972Sstephen hemminger 
231661b7972Sstephen hemminger 		break;
232a6e2fe17SYang Yingliang 	case TX_IN_BURST_PERIOD:
233661b7972Sstephen hemminger 		if (rnd < clg->a5) {
234a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
235661b7972Sstephen hemminger 			return true;
236a6e2fe17SYang Yingliang 		} else {
237a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
238a6e2fe17SYang Yingliang 		}
239661b7972Sstephen hemminger 
240661b7972Sstephen hemminger 		break;
241a6e2fe17SYang Yingliang 	case LOST_IN_GAP_PERIOD:
242661b7972Sstephen hemminger 		if (rnd < clg->a3)
243a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
244661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
245a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
246661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
247a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
248661b7972Sstephen hemminger 			return true;
249661b7972Sstephen hemminger 		}
250661b7972Sstephen hemminger 		break;
251a6e2fe17SYang Yingliang 	case LOST_IN_BURST_PERIOD:
252a6e2fe17SYang Yingliang 		clg->state = TX_IN_GAP_PERIOD;
253661b7972Sstephen hemminger 		break;
254661b7972Sstephen hemminger 	}
255661b7972Sstephen hemminger 
256661b7972Sstephen hemminger 	return false;
257661b7972Sstephen hemminger }
258661b7972Sstephen hemminger 
259661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
260661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
261661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
262661b7972Sstephen hemminger  *
26325985edcSLucas De Marchi  * Makes a comparison between random number and the transition
264661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
26525985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
266661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
267661b7972Sstephen hemminger  * packet will be transmitted or lost.
268661b7972Sstephen hemminger  */
269661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
270661b7972Sstephen hemminger {
271661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
272661b7972Sstephen hemminger 
273661b7972Sstephen hemminger 	switch (clg->state) {
274c045a734SYang Yingliang 	case GOOD_STATE:
27563862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a1)
276c045a734SYang Yingliang 			clg->state = BAD_STATE;
27763862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a4)
278661b7972Sstephen hemminger 			return true;
2797c2781faSstephen hemminger 		break;
280c045a734SYang Yingliang 	case BAD_STATE:
28163862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a2)
282c045a734SYang Yingliang 			clg->state = GOOD_STATE;
28363862b5bSAruna-Hewapathirane 		if (prandom_u32() > clg->a3)
284661b7972Sstephen hemminger 			return true;
285661b7972Sstephen hemminger 	}
286661b7972Sstephen hemminger 
287661b7972Sstephen hemminger 	return false;
288661b7972Sstephen hemminger }
289661b7972Sstephen hemminger 
290661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
291661b7972Sstephen hemminger {
292661b7972Sstephen hemminger 	switch (q->loss_model) {
293661b7972Sstephen hemminger 	case CLG_RANDOM:
294661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
295661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
296661b7972Sstephen hemminger 
297661b7972Sstephen hemminger 	case CLG_4_STATES:
298661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
299661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
300661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
301661b7972Sstephen hemminger 		* the kernel logs
302661b7972Sstephen hemminger 		*/
303661b7972Sstephen hemminger 		return loss_4state(q);
304661b7972Sstephen hemminger 
305661b7972Sstephen hemminger 	case CLG_GILB_ELL:
306661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
307661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
308661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
309661b7972Sstephen hemminger 		* the kernel logs
310661b7972Sstephen hemminger 		*/
311661b7972Sstephen hemminger 		return loss_gilb_ell(q);
312661b7972Sstephen hemminger 	}
313661b7972Sstephen hemminger 
314661b7972Sstephen hemminger 	return false;	/* not reached */
315661b7972Sstephen hemminger }
316661b7972Sstephen hemminger 
317661b7972Sstephen hemminger 
3181da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
3191da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
3201da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
3211da177e4SLinus Torvalds  */
3229b0ed891SStephen Hemminger static s64 tabledist(s64 mu, s32 sigma,
323b407621cSStephen Hemminger 		     struct crndstate *state,
324b407621cSStephen Hemminger 		     const struct disttable *dist)
3251da177e4SLinus Torvalds {
326112f9cb6SDave Taht 	s64 x;
327b407621cSStephen Hemminger 	long t;
328b407621cSStephen Hemminger 	u32 rnd;
3291da177e4SLinus Torvalds 
3301da177e4SLinus Torvalds 	if (sigma == 0)
3311da177e4SLinus Torvalds 		return mu;
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds 	rnd = get_crandom(state);
3341da177e4SLinus Torvalds 
3351da177e4SLinus Torvalds 	/* default uniform distribution */
3361da177e4SLinus Torvalds 	if (dist == NULL)
337043e337fSMd. Islam 		return ((rnd % (2 * sigma)) + mu) - sigma;
3381da177e4SLinus Torvalds 
3391da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3401da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3411da177e4SLinus Torvalds 	if (x >= 0)
3421da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3431da177e4SLinus Torvalds 	else
3441da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3451da177e4SLinus Torvalds 
3461da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3471da177e4SLinus Torvalds }
3481da177e4SLinus Torvalds 
349bce552fdSStephen Hemminger static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
3507bc0f28cSHagen Paul Pfeifer {
35190b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
35290b41a1cSHagen Paul Pfeifer 
35390b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
35490b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
35590b41a1cSHagen Paul Pfeifer 
35690b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
35790b41a1cSHagen Paul Pfeifer 			cells++;
35890b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
35990b41a1cSHagen Paul Pfeifer 	}
360bce552fdSStephen Hemminger 
361bce552fdSStephen Hemminger 	return div64_u64(len * NSEC_PER_SEC, q->rate);
3627bc0f28cSHagen Paul Pfeifer }
3637bc0f28cSHagen Paul Pfeifer 
364ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch)
365ff704050Sstephen hemminger {
366ff704050Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3673aa605f2SEric Dumazet 	struct rb_node *p = rb_first(&q->t_root);
368ff704050Sstephen hemminger 
3693aa605f2SEric Dumazet 	while (p) {
37018a4c0eaSEric Dumazet 		struct sk_buff *skb = rb_to_skb(p);
371ff704050Sstephen hemminger 
3723aa605f2SEric Dumazet 		p = rb_next(p);
3733aa605f2SEric Dumazet 		rb_erase(&skb->rbnode, &q->t_root);
3742f08a9a1SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
375ff704050Sstephen hemminger 	}
376d66280b1SPeter Oskolkov 
377d66280b1SPeter Oskolkov 	rtnl_kfree_skbs(q->t_head, q->t_tail);
378d66280b1SPeter Oskolkov 	q->t_head = NULL;
379d66280b1SPeter Oskolkov 	q->t_tail = NULL;
380ff704050Sstephen hemminger }
381ff704050Sstephen hemminger 
382960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
38350612537SEric Dumazet {
384aec0a40aSEric Dumazet 	struct netem_sched_data *q = qdisc_priv(sch);
385112f9cb6SDave Taht 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
386d66280b1SPeter Oskolkov 
387d66280b1SPeter Oskolkov 	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
388d66280b1SPeter Oskolkov 		if (q->t_tail)
389d66280b1SPeter Oskolkov 			q->t_tail->next = nskb;
390d66280b1SPeter Oskolkov 		else
391d66280b1SPeter Oskolkov 			q->t_head = nskb;
392d66280b1SPeter Oskolkov 		q->t_tail = nskb;
393d66280b1SPeter Oskolkov 	} else {
394aec0a40aSEric Dumazet 		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
39550612537SEric Dumazet 
396aec0a40aSEric Dumazet 		while (*p) {
397aec0a40aSEric Dumazet 			struct sk_buff *skb;
39850612537SEric Dumazet 
399aec0a40aSEric Dumazet 			parent = *p;
40018a4c0eaSEric Dumazet 			skb = rb_to_skb(parent);
40150612537SEric Dumazet 			if (tnext >= netem_skb_cb(skb)->time_to_send)
402aec0a40aSEric Dumazet 				p = &parent->rb_right;
403aec0a40aSEric Dumazet 			else
404aec0a40aSEric Dumazet 				p = &parent->rb_left;
40550612537SEric Dumazet 		}
40656b17425SEric Dumazet 		rb_link_node(&nskb->rbnode, parent, p);
40756b17425SEric Dumazet 		rb_insert_color(&nskb->rbnode, &q->t_root);
408d66280b1SPeter Oskolkov 	}
409aec0a40aSEric Dumazet 	sch->q.qlen++;
41050612537SEric Dumazet }
41150612537SEric Dumazet 
4126071bd1aSNeil Horman /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
4136071bd1aSNeil Horman  * when we statistically choose to corrupt one, we instead segment it, returning
4146071bd1aSNeil Horman  * the first packet to be corrupted, and re-enqueue the remaining frames
4156071bd1aSNeil Horman  */
416520ac30fSEric Dumazet static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
417520ac30fSEric Dumazet 				     struct sk_buff **to_free)
4186071bd1aSNeil Horman {
4196071bd1aSNeil Horman 	struct sk_buff *segs;
4206071bd1aSNeil Horman 	netdev_features_t features = netif_skb_features(skb);
4216071bd1aSNeil Horman 
4226071bd1aSNeil Horman 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
4236071bd1aSNeil Horman 
4246071bd1aSNeil Horman 	if (IS_ERR_OR_NULL(segs)) {
425520ac30fSEric Dumazet 		qdisc_drop(skb, sch, to_free);
4266071bd1aSNeil Horman 		return NULL;
4276071bd1aSNeil Horman 	}
4286071bd1aSNeil Horman 	consume_skb(skb);
4296071bd1aSNeil Horman 	return segs;
4306071bd1aSNeil Horman }
4316071bd1aSNeil Horman 
4320afb51e7SStephen Hemminger /*
4330afb51e7SStephen Hemminger  * Insert one skb into qdisc.
4340afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
4350afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
4360afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
4370afb51e7SStephen Hemminger  */
438520ac30fSEric Dumazet static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
439520ac30fSEric Dumazet 			 struct sk_buff **to_free)
4401da177e4SLinus Torvalds {
4411da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
44289e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
44389e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
4440afb51e7SStephen Hemminger 	struct sk_buff *skb2;
4456071bd1aSNeil Horman 	struct sk_buff *segs = NULL;
4466071bd1aSNeil Horman 	unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
4476071bd1aSNeil Horman 	int nb = 0;
4480afb51e7SStephen Hemminger 	int count = 1;
4496071bd1aSNeil Horman 	int rc = NET_XMIT_SUCCESS;
4505845f706SSheng Lan 	int rc_drop = NET_XMIT_DROP;
4511da177e4SLinus Torvalds 
4529410d386SChristoph Paasch 	/* Do not fool qdisc_drop_all() */
4539410d386SChristoph Paasch 	skb->prev = NULL;
4549410d386SChristoph Paasch 
4550afb51e7SStephen Hemminger 	/* Random duplication */
4560afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
4570afb51e7SStephen Hemminger 		++count;
4580afb51e7SStephen Hemminger 
459661b7972Sstephen hemminger 	/* Drop packet? */
460e4ae004bSEric Dumazet 	if (loss_event(q)) {
461e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
46225331d6cSJohn Fastabend 			qdisc_qstats_drop(sch); /* mark packet */
463e4ae004bSEric Dumazet 		else
4640afb51e7SStephen Hemminger 			--count;
465e4ae004bSEric Dumazet 	}
4660afb51e7SStephen Hemminger 	if (count == 0) {
46725331d6cSJohn Fastabend 		qdisc_qstats_drop(sch);
468520ac30fSEric Dumazet 		__qdisc_drop(skb, to_free);
469c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
4701da177e4SLinus Torvalds 	}
4711da177e4SLinus Torvalds 
4725a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
4735a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
4745a308f40SEric Dumazet 	 */
4755080f39eSNik Unger 	if (q->latency || q->jitter || q->rate)
476f2f872f9SEric Dumazet 		skb_orphan_partial(skb);
4774e8a5201SDavid S. Miller 
4780afb51e7SStephen Hemminger 	/*
4790afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
4800afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
4810afb51e7SStephen Hemminger 	 * skb will be queued.
482d5d75cd6SStephen Hemminger 	 */
4830afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
4847698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
4850afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
486d5d75cd6SStephen Hemminger 
487b396cca6SEric Dumazet 		q->duplicate = 0;
488520ac30fSEric Dumazet 		rootq->enqueue(skb2, rootq, to_free);
4890afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4905845f706SSheng Lan 		rc_drop = NET_XMIT_SUCCESS;
4911da177e4SLinus Torvalds 	}
4921da177e4SLinus Torvalds 
493c865e5d9SStephen Hemminger 	/*
494c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
495c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
496c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
497c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
498c865e5d9SStephen Hemminger 	 */
499c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
5006071bd1aSNeil Horman 		if (skb_is_gso(skb)) {
501520ac30fSEric Dumazet 			segs = netem_segment(skb, sch, to_free);
5026071bd1aSNeil Horman 			if (!segs)
5035845f706SSheng Lan 				return rc_drop;
5046071bd1aSNeil Horman 		} else {
5056071bd1aSNeil Horman 			segs = skb;
5066071bd1aSNeil Horman 		}
5076071bd1aSNeil Horman 
5086071bd1aSNeil Horman 		skb = segs;
5096071bd1aSNeil Horman 		segs = segs->next;
5106071bd1aSNeil Horman 
5118a6e9c67SEric Dumazet 		skb = skb_unshare(skb, GFP_ATOMIC);
5128a6e9c67SEric Dumazet 		if (unlikely(!skb)) {
5138a6e9c67SEric Dumazet 			qdisc_qstats_drop(sch);
5148a6e9c67SEric Dumazet 			goto finish_segs;
5158a6e9c67SEric Dumazet 		}
5168a6e9c67SEric Dumazet 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
5178a6e9c67SEric Dumazet 		    skb_checksum_help(skb)) {
5188a6e9c67SEric Dumazet 			qdisc_drop(skb, sch, to_free);
5196071bd1aSNeil Horman 			goto finish_segs;
5206071bd1aSNeil Horman 		}
521c865e5d9SStephen Hemminger 
52263862b5bSAruna-Hewapathirane 		skb->data[prandom_u32() % skb_headlen(skb)] ^=
52363862b5bSAruna-Hewapathirane 			1<<(prandom_u32() % 8);
524c865e5d9SStephen Hemminger 	}
525c865e5d9SStephen Hemminger 
5265845f706SSheng Lan 	if (unlikely(sch->q.qlen >= sch->limit)) {
5275845f706SSheng Lan 		qdisc_drop_all(skb, sch, to_free);
5285845f706SSheng Lan 		return rc_drop;
5295845f706SSheng Lan 	}
530960fb66eSEric Dumazet 
53125331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
532960fb66eSEric Dumazet 
5335f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
534f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
535a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
536f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
537112f9cb6SDave Taht 		u64 now;
538112f9cb6SDave Taht 		s64 delay;
53907aaa115SStephen Hemminger 
54007aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
54107aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
54207aaa115SStephen Hemminger 
543112f9cb6SDave Taht 		now = ktime_get_ns();
5447bc0f28cSHagen Paul Pfeifer 
5457bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
5465080f39eSNik Unger 			struct netem_skb_cb *last = NULL;
5477bc0f28cSHagen Paul Pfeifer 
5485080f39eSNik Unger 			if (sch->q.tail)
5495080f39eSNik Unger 				last = netem_skb_cb(sch->q.tail);
5505080f39eSNik Unger 			if (q->t_root.rb_node) {
5515080f39eSNik Unger 				struct sk_buff *t_skb;
5525080f39eSNik Unger 				struct netem_skb_cb *t_last;
5535080f39eSNik Unger 
55418a4c0eaSEric Dumazet 				t_skb = skb_rb_last(&q->t_root);
5555080f39eSNik Unger 				t_last = netem_skb_cb(t_skb);
5565080f39eSNik Unger 				if (!last ||
557d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
5585080f39eSNik Unger 					last = t_last;
5595080f39eSNik Unger 			}
560d66280b1SPeter Oskolkov 			if (q->t_tail) {
561d66280b1SPeter Oskolkov 				struct netem_skb_cb *t_last =
562d66280b1SPeter Oskolkov 					netem_skb_cb(q->t_tail);
563d66280b1SPeter Oskolkov 
564d66280b1SPeter Oskolkov 				if (!last ||
565d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
566d66280b1SPeter Oskolkov 					last = t_last;
5675080f39eSNik Unger 			}
5685080f39eSNik Unger 
569aec0a40aSEric Dumazet 			if (last) {
5707bc0f28cSHagen Paul Pfeifer 				/*
571a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
572a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
5737bc0f28cSHagen Paul Pfeifer 				 * from delay.
5747bc0f28cSHagen Paul Pfeifer 				 */
5755080f39eSNik Unger 				delay -= last->time_to_send - now;
576112f9cb6SDave Taht 				delay = max_t(s64, 0, delay);
5775080f39eSNik Unger 				now = last->time_to_send;
5787bc0f28cSHagen Paul Pfeifer 			}
579a13d3104SJohannes Naab 
580bce552fdSStephen Hemminger 			delay += packet_time_ns(qdisc_pkt_len(skb), q);
5817bc0f28cSHagen Paul Pfeifer 		}
5827bc0f28cSHagen Paul Pfeifer 
5837c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
5841da177e4SLinus Torvalds 		++q->counter;
585960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
5861da177e4SLinus Torvalds 	} else {
5870dca51d3SStephen Hemminger 		/*
5880dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
5890dca51d3SStephen Hemminger 		 * of the queue.
5900dca51d3SStephen Hemminger 		 */
591112f9cb6SDave Taht 		cb->time_to_send = ktime_get_ns();
5920dca51d3SStephen Hemminger 		q->counter = 0;
5938ba25dadSJarek Poplawski 
59459697730SDavid S. Miller 		__qdisc_enqueue_head(skb, &sch->q);
595eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
596378a2f09SJarek Poplawski 	}
5971da177e4SLinus Torvalds 
5986071bd1aSNeil Horman finish_segs:
5996071bd1aSNeil Horman 	if (segs) {
6006071bd1aSNeil Horman 		while (segs) {
6016071bd1aSNeil Horman 			skb2 = segs->next;
602a8305bffSDavid S. Miller 			skb_mark_not_on_list(segs);
6036071bd1aSNeil Horman 			qdisc_skb_cb(segs)->pkt_len = segs->len;
6046071bd1aSNeil Horman 			last_len = segs->len;
605520ac30fSEric Dumazet 			rc = qdisc_enqueue(segs, sch, to_free);
6066071bd1aSNeil Horman 			if (rc != NET_XMIT_SUCCESS) {
6076071bd1aSNeil Horman 				if (net_xmit_drop_count(rc))
6086071bd1aSNeil Horman 					qdisc_qstats_drop(sch);
6096071bd1aSNeil Horman 			} else {
6106071bd1aSNeil Horman 				nb++;
6116071bd1aSNeil Horman 				len += last_len;
6126071bd1aSNeil Horman 			}
6136071bd1aSNeil Horman 			segs = skb2;
6146071bd1aSNeil Horman 		}
6156071bd1aSNeil Horman 		sch->q.qlen += nb;
6166071bd1aSNeil Horman 		if (nb > 1)
6176071bd1aSNeil Horman 			qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
6186071bd1aSNeil Horman 	}
61910f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
6201da177e4SLinus Torvalds }
6211da177e4SLinus Torvalds 
622836af83bSDave Taht /* Delay the next round with a new future slot with a
623836af83bSDave Taht  * correct number of bytes and packets.
624836af83bSDave Taht  */
625836af83bSDave Taht 
626836af83bSDave Taht static void get_slot_next(struct netem_sched_data *q, u64 now)
627836af83bSDave Taht {
6280a9fe5c3SYousuk Seung 	s64 next_delay;
6290a9fe5c3SYousuk Seung 
6300a9fe5c3SYousuk Seung 	if (!q->slot_dist)
6310a9fe5c3SYousuk Seung 		next_delay = q->slot_config.min_delay +
632836af83bSDave Taht 				(prandom_u32() *
633836af83bSDave Taht 				 (q->slot_config.max_delay -
634836af83bSDave Taht 				  q->slot_config.min_delay) >> 32);
6350a9fe5c3SYousuk Seung 	else
6360a9fe5c3SYousuk Seung 		next_delay = tabledist(q->slot_config.dist_delay,
6370a9fe5c3SYousuk Seung 				       (s32)(q->slot_config.dist_jitter),
6380a9fe5c3SYousuk Seung 				       NULL, q->slot_dist);
6390a9fe5c3SYousuk Seung 
6400a9fe5c3SYousuk Seung 	q->slot.slot_next = now + next_delay;
641836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
642836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
643836af83bSDave Taht }
644836af83bSDave Taht 
645d66280b1SPeter Oskolkov static struct sk_buff *netem_peek(struct netem_sched_data *q)
646d66280b1SPeter Oskolkov {
647d66280b1SPeter Oskolkov 	struct sk_buff *skb = skb_rb_first(&q->t_root);
648d66280b1SPeter Oskolkov 	u64 t1, t2;
649d66280b1SPeter Oskolkov 
650d66280b1SPeter Oskolkov 	if (!skb)
651d66280b1SPeter Oskolkov 		return q->t_head;
652d66280b1SPeter Oskolkov 	if (!q->t_head)
653d66280b1SPeter Oskolkov 		return skb;
654d66280b1SPeter Oskolkov 
655d66280b1SPeter Oskolkov 	t1 = netem_skb_cb(skb)->time_to_send;
656d66280b1SPeter Oskolkov 	t2 = netem_skb_cb(q->t_head)->time_to_send;
657d66280b1SPeter Oskolkov 	if (t1 < t2)
658d66280b1SPeter Oskolkov 		return skb;
659d66280b1SPeter Oskolkov 	return q->t_head;
660d66280b1SPeter Oskolkov }
661d66280b1SPeter Oskolkov 
662d66280b1SPeter Oskolkov static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
663d66280b1SPeter Oskolkov {
664d66280b1SPeter Oskolkov 	if (skb == q->t_head) {
665d66280b1SPeter Oskolkov 		q->t_head = skb->next;
666d66280b1SPeter Oskolkov 		if (!q->t_head)
667d66280b1SPeter Oskolkov 			q->t_tail = NULL;
668d66280b1SPeter Oskolkov 	} else {
669d66280b1SPeter Oskolkov 		rb_erase(&skb->rbnode, &q->t_root);
670d66280b1SPeter Oskolkov 	}
671d66280b1SPeter Oskolkov }
672d66280b1SPeter Oskolkov 
6731da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
6741da177e4SLinus Torvalds {
6751da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6761da177e4SLinus Torvalds 	struct sk_buff *skb;
6771da177e4SLinus Torvalds 
67850612537SEric Dumazet tfifo_dequeue:
679ed760cb8SFlorian Westphal 	skb = __qdisc_dequeue_head(&sch->q);
680771018e7SStephen Hemminger 	if (skb) {
68125331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
6820ad2a836SBeshay, Joseph deliver:
683aec0a40aSEric Dumazet 		qdisc_bstats_update(sch, skb);
684aec0a40aSEric Dumazet 		return skb;
685aec0a40aSEric Dumazet 	}
686d66280b1SPeter Oskolkov 	skb = netem_peek(q);
687d66280b1SPeter Oskolkov 	if (skb) {
688112f9cb6SDave Taht 		u64 time_to_send;
689836af83bSDave Taht 		u64 now = ktime_get_ns();
69036b7bfe0SEric Dumazet 
6910f9f32acSStephen Hemminger 		/* if more time remaining? */
69236b7bfe0SEric Dumazet 		time_to_send = netem_skb_cb(skb)->time_to_send;
693836af83bSDave Taht 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
694836af83bSDave Taht 			get_slot_next(q, now);
695aec0a40aSEric Dumazet 
696836af83bSDave Taht 		if (time_to_send <= now && q->slot.slot_next <= now) {
697d66280b1SPeter Oskolkov 			netem_erase_head(q, skb);
698aec0a40aSEric Dumazet 			sch->q.qlen--;
6990ad2a836SBeshay, Joseph 			qdisc_qstats_backlog_dec(sch, skb);
700aec0a40aSEric Dumazet 			skb->next = NULL;
701aec0a40aSEric Dumazet 			skb->prev = NULL;
702bffa72cfSEric Dumazet 			/* skb->dev shares skb->rbnode area,
703bffa72cfSEric Dumazet 			 * we need to restore its value.
704bffa72cfSEric Dumazet 			 */
705bffa72cfSEric Dumazet 			skb->dev = qdisc_dev(sch);
70603c05f0dSJarek Poplawski 
707836af83bSDave Taht 			if (q->slot.slot_next) {
708836af83bSDave Taht 				q->slot.packets_left--;
709836af83bSDave Taht 				q->slot.bytes_left -= qdisc_pkt_len(skb);
710836af83bSDave Taht 				if (q->slot.packets_left <= 0 ||
711836af83bSDave Taht 				    q->slot.bytes_left <= 0)
712836af83bSDave Taht 					get_slot_next(q, now);
713836af83bSDave Taht 			}
714836af83bSDave Taht 
71550612537SEric Dumazet 			if (q->qdisc) {
71621de12eeSEric Dumazet 				unsigned int pkt_len = qdisc_pkt_len(skb);
717520ac30fSEric Dumazet 				struct sk_buff *to_free = NULL;
718520ac30fSEric Dumazet 				int err;
71950612537SEric Dumazet 
720520ac30fSEric Dumazet 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
721520ac30fSEric Dumazet 				kfree_skb_list(to_free);
72221de12eeSEric Dumazet 				if (err != NET_XMIT_SUCCESS &&
72321de12eeSEric Dumazet 				    net_xmit_drop_count(err)) {
72425331d6cSJohn Fastabend 					qdisc_qstats_drop(sch);
7252ccccf5fSWANG Cong 					qdisc_tree_reduce_backlog(sch, 1,
72621de12eeSEric Dumazet 								  pkt_len);
72750612537SEric Dumazet 				}
72850612537SEric Dumazet 				goto tfifo_dequeue;
72950612537SEric Dumazet 			}
730aec0a40aSEric Dumazet 			goto deliver;
73111274e5aSStephen Hemminger 		}
73207aaa115SStephen Hemminger 
73350612537SEric Dumazet 		if (q->qdisc) {
73450612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
73550612537SEric Dumazet 			if (skb)
73650612537SEric Dumazet 				goto deliver;
73750612537SEric Dumazet 		}
738836af83bSDave Taht 
739836af83bSDave Taht 		qdisc_watchdog_schedule_ns(&q->watchdog,
740836af83bSDave Taht 					   max(time_to_send,
741836af83bSDave Taht 					       q->slot.slot_next));
7420f9f32acSStephen Hemminger 	}
7430f9f32acSStephen Hemminger 
74450612537SEric Dumazet 	if (q->qdisc) {
74550612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
74650612537SEric Dumazet 		if (skb)
74750612537SEric Dumazet 			goto deliver;
74850612537SEric Dumazet 	}
7490f9f32acSStephen Hemminger 	return NULL;
7501da177e4SLinus Torvalds }
7511da177e4SLinus Torvalds 
7521da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
7531da177e4SLinus Torvalds {
7541da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7551da177e4SLinus Torvalds 
75650612537SEric Dumazet 	qdisc_reset_queue(sch);
757ff704050Sstephen hemminger 	tfifo_reset(sch);
75850612537SEric Dumazet 	if (q->qdisc)
7591da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
76059cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
7611da177e4SLinus Torvalds }
7621da177e4SLinus Torvalds 
7636373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
7646373a9a2Sstephen hemminger {
7654cb28970SWANG Cong 	kvfree(d);
7666373a9a2Sstephen hemminger }
7676373a9a2Sstephen hemminger 
7681da177e4SLinus Torvalds /*
7691da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
7701da177e4SLinus Torvalds  * signed 16 bit values.
7711da177e4SLinus Torvalds  */
772836af83bSDave Taht 
7730a9fe5c3SYousuk Seung static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
7740a9fe5c3SYousuk Seung 			  const struct nlattr *attr)
7751da177e4SLinus Torvalds {
7766373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
7771e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
7787698b4fcSDavid S. Miller 	spinlock_t *root_lock;
7791da177e4SLinus Torvalds 	struct disttable *d;
7801da177e4SLinus Torvalds 	int i;
7811da177e4SLinus Torvalds 
782df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
7831da177e4SLinus Torvalds 		return -EINVAL;
7841da177e4SLinus Torvalds 
785752ade68SMichal Hocko 	d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
7861da177e4SLinus Torvalds 	if (!d)
7871da177e4SLinus Torvalds 		return -ENOMEM;
7881da177e4SLinus Torvalds 
7891da177e4SLinus Torvalds 	d->size = n;
7901da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
7911da177e4SLinus Torvalds 		d->table[i] = data[i];
7921da177e4SLinus Torvalds 
793102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
7947698b4fcSDavid S. Miller 
7957698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
7960a9fe5c3SYousuk Seung 	swap(*tbl, d);
7977698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
798bb52c7acSEric Dumazet 
799bb52c7acSEric Dumazet 	dist_free(d);
8001da177e4SLinus Torvalds 	return 0;
8011da177e4SLinus Torvalds }
8021da177e4SLinus Torvalds 
803836af83bSDave Taht static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
804836af83bSDave Taht {
805836af83bSDave Taht 	const struct tc_netem_slot *c = nla_data(attr);
806836af83bSDave Taht 
807836af83bSDave Taht 	q->slot_config = *c;
808836af83bSDave Taht 	if (q->slot_config.max_packets == 0)
809836af83bSDave Taht 		q->slot_config.max_packets = INT_MAX;
810836af83bSDave Taht 	if (q->slot_config.max_bytes == 0)
811836af83bSDave Taht 		q->slot_config.max_bytes = INT_MAX;
812836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
813836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
8140a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
8150a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter)
816836af83bSDave Taht 		q->slot.slot_next = ktime_get_ns();
817836af83bSDave Taht 	else
818836af83bSDave Taht 		q->slot.slot_next = 0;
819836af83bSDave Taht }
820836af83bSDave Taht 
82149545a77SYang Yingliang static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
8221da177e4SLinus Torvalds {
8231e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
8241da177e4SLinus Torvalds 
8251da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
8261da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
8271da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
8281da177e4SLinus Torvalds }
8291da177e4SLinus Torvalds 
83049545a77SYang Yingliang static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
8310dca51d3SStephen Hemminger {
8321e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
8330dca51d3SStephen Hemminger 
8340dca51d3SStephen Hemminger 	q->reorder = r->probability;
8350dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
8360dca51d3SStephen Hemminger }
8370dca51d3SStephen Hemminger 
83849545a77SYang Yingliang static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
839c865e5d9SStephen Hemminger {
8401e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
841c865e5d9SStephen Hemminger 
842c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
843c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
844c865e5d9SStephen Hemminger }
845c865e5d9SStephen Hemminger 
84649545a77SYang Yingliang static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
8477bc0f28cSHagen Paul Pfeifer {
8487bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
8497bc0f28cSHagen Paul Pfeifer 
8507bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
85190b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
85290b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
853809fa972SHannes Frederic Sowa 	q->cell_overhead = r->cell_overhead;
85490b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
85590b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
856809fa972SHannes Frederic Sowa 	else
857809fa972SHannes Frederic Sowa 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
8587bc0f28cSHagen Paul Pfeifer }
8597bc0f28cSHagen Paul Pfeifer 
86049545a77SYang Yingliang static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
861661b7972Sstephen hemminger {
862661b7972Sstephen hemminger 	const struct nlattr *la;
863661b7972Sstephen hemminger 	int rem;
864661b7972Sstephen hemminger 
865661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
866661b7972Sstephen hemminger 		u16 type = nla_type(la);
867661b7972Sstephen hemminger 
868661b7972Sstephen hemminger 		switch (type) {
869661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
870661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
871661b7972Sstephen hemminger 
8722494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
873661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
874661b7972Sstephen hemminger 				return -EINVAL;
875661b7972Sstephen hemminger 			}
876661b7972Sstephen hemminger 
877661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
878661b7972Sstephen hemminger 
8793fbac2a8SYang Yingliang 			q->clg.state = TX_IN_GAP_PERIOD;
880661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
881661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
882661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
883661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
884661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
885661b7972Sstephen hemminger 			break;
886661b7972Sstephen hemminger 		}
887661b7972Sstephen hemminger 
888661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
889661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
890661b7972Sstephen hemminger 
8912494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
8922494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
893661b7972Sstephen hemminger 				return -EINVAL;
894661b7972Sstephen hemminger 			}
895661b7972Sstephen hemminger 
896661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
8973fbac2a8SYang Yingliang 			q->clg.state = GOOD_STATE;
898661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
899661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
900661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
901661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
902661b7972Sstephen hemminger 			break;
903661b7972Sstephen hemminger 		}
904661b7972Sstephen hemminger 
905661b7972Sstephen hemminger 		default:
906661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
907661b7972Sstephen hemminger 			return -EINVAL;
908661b7972Sstephen hemminger 		}
909661b7972Sstephen hemminger 	}
910661b7972Sstephen hemminger 
911661b7972Sstephen hemminger 	return 0;
912661b7972Sstephen hemminger }
913661b7972Sstephen hemminger 
91427a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
91527a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
91627a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
91727a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
9187bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
919661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
920e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
9216a031f67SYang Yingliang 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
92299803171SDave Taht 	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
92399803171SDave Taht 	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
924836af83bSDave Taht 	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
92527a3421eSPatrick McHardy };
92627a3421eSPatrick McHardy 
9272c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
9282c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
9292c10b32bSThomas Graf {
9302c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
9312c10b32bSThomas Graf 
932661b7972Sstephen hemminger 	if (nested_len < 0) {
933661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
9342c10b32bSThomas Graf 		return -EINVAL;
935661b7972Sstephen hemminger 	}
936661b7972Sstephen hemminger 
9372c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
9388cb08174SJohannes Berg 		return nla_parse_deprecated(tb, maxtype,
9398cb08174SJohannes Berg 					    nla_data(nla) + NLA_ALIGN(len),
940fceb6435SJohannes Berg 					    nested_len, policy, NULL);
941661b7972Sstephen hemminger 
9422c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
9432c10b32bSThomas Graf 	return 0;
9442c10b32bSThomas Graf }
9452c10b32bSThomas Graf 
946c865e5d9SStephen Hemminger /* Parse netlink message to set options */
9472030721cSAlexander Aring static int netem_change(struct Qdisc *sch, struct nlattr *opt,
9482030721cSAlexander Aring 			struct netlink_ext_ack *extack)
9491da177e4SLinus Torvalds {
9501da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
951b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
9521da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
95354a4b05cSYang Yingliang 	struct clgstate old_clg;
95454a4b05cSYang Yingliang 	int old_loss_model = CLG_RANDOM;
9551da177e4SLinus Torvalds 	int ret;
9561da177e4SLinus Torvalds 
957b03f4672SPatrick McHardy 	if (opt == NULL)
9581da177e4SLinus Torvalds 		return -EINVAL;
9591da177e4SLinus Torvalds 
9602c10b32bSThomas Graf 	qopt = nla_data(opt);
9612c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
962b03f4672SPatrick McHardy 	if (ret < 0)
963b03f4672SPatrick McHardy 		return ret;
964b03f4672SPatrick McHardy 
96554a4b05cSYang Yingliang 	/* backup q->clg and q->loss_model */
96654a4b05cSYang Yingliang 	old_clg = q->clg;
96754a4b05cSYang Yingliang 	old_loss_model = q->loss_model;
96854a4b05cSYang Yingliang 
96954a4b05cSYang Yingliang 	if (tb[TCA_NETEM_LOSS]) {
97049545a77SYang Yingliang 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
97154a4b05cSYang Yingliang 		if (ret) {
97254a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
97354a4b05cSYang Yingliang 			return ret;
97454a4b05cSYang Yingliang 		}
97554a4b05cSYang Yingliang 	} else {
97654a4b05cSYang Yingliang 		q->loss_model = CLG_RANDOM;
97754a4b05cSYang Yingliang 	}
97854a4b05cSYang Yingliang 
97954a4b05cSYang Yingliang 	if (tb[TCA_NETEM_DELAY_DIST]) {
9800a9fe5c3SYousuk Seung 		ret = get_dist_table(sch, &q->delay_dist,
9810a9fe5c3SYousuk Seung 				     tb[TCA_NETEM_DELAY_DIST]);
9820a9fe5c3SYousuk Seung 		if (ret)
9830a9fe5c3SYousuk Seung 			goto get_table_failure;
98454a4b05cSYang Yingliang 	}
9850a9fe5c3SYousuk Seung 
9860a9fe5c3SYousuk Seung 	if (tb[TCA_NETEM_SLOT_DIST]) {
9870a9fe5c3SYousuk Seung 		ret = get_dist_table(sch, &q->slot_dist,
9880a9fe5c3SYousuk Seung 				     tb[TCA_NETEM_SLOT_DIST]);
9890a9fe5c3SYousuk Seung 		if (ret)
9900a9fe5c3SYousuk Seung 			goto get_table_failure;
99154a4b05cSYang Yingliang 	}
99254a4b05cSYang Yingliang 
99350612537SEric Dumazet 	sch->limit = qopt->limit;
9941da177e4SLinus Torvalds 
995112f9cb6SDave Taht 	q->latency = PSCHED_TICKS2NS(qopt->latency);
996112f9cb6SDave Taht 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
9971da177e4SLinus Torvalds 	q->limit = qopt->limit;
9981da177e4SLinus Torvalds 	q->gap = qopt->gap;
9990dca51d3SStephen Hemminger 	q->counter = 0;
10001da177e4SLinus Torvalds 	q->loss = qopt->loss;
10011da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
10021da177e4SLinus Torvalds 
1003bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
1004bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
10050dca51d3SStephen Hemminger 	 */
1006a362e0a7SStephen Hemminger 	if (q->gap)
10070dca51d3SStephen Hemminger 		q->reorder = ~0;
10080dca51d3SStephen Hemminger 
1009265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
101049545a77SYang Yingliang 		get_correlation(q, tb[TCA_NETEM_CORR]);
10111da177e4SLinus Torvalds 
1012265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
101349545a77SYang Yingliang 		get_reorder(q, tb[TCA_NETEM_REORDER]);
10141da177e4SLinus Torvalds 
1015265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
101649545a77SYang Yingliang 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
10171da177e4SLinus Torvalds 
10187bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
101949545a77SYang Yingliang 		get_rate(q, tb[TCA_NETEM_RATE]);
10207bc0f28cSHagen Paul Pfeifer 
10216a031f67SYang Yingliang 	if (tb[TCA_NETEM_RATE64])
10226a031f67SYang Yingliang 		q->rate = max_t(u64, q->rate,
10236a031f67SYang Yingliang 				nla_get_u64(tb[TCA_NETEM_RATE64]));
10246a031f67SYang Yingliang 
102599803171SDave Taht 	if (tb[TCA_NETEM_LATENCY64])
102699803171SDave Taht 		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
102799803171SDave Taht 
102899803171SDave Taht 	if (tb[TCA_NETEM_JITTER64])
102999803171SDave Taht 		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
103099803171SDave Taht 
1031e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
1032e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1033e4ae004bSEric Dumazet 
1034836af83bSDave Taht 	if (tb[TCA_NETEM_SLOT])
1035836af83bSDave Taht 		get_slot(q, tb[TCA_NETEM_SLOT]);
1036836af83bSDave Taht 
1037661b7972Sstephen hemminger 	return ret;
10380a9fe5c3SYousuk Seung 
10390a9fe5c3SYousuk Seung get_table_failure:
10400a9fe5c3SYousuk Seung 	/* recover clg and loss_model, in case of
10410a9fe5c3SYousuk Seung 	 * q->clg and q->loss_model were modified
10420a9fe5c3SYousuk Seung 	 * in get_loss_clg()
10430a9fe5c3SYousuk Seung 	 */
10440a9fe5c3SYousuk Seung 	q->clg = old_clg;
10450a9fe5c3SYousuk Seung 	q->loss_model = old_loss_model;
10460a9fe5c3SYousuk Seung 	return ret;
10471da177e4SLinus Torvalds }
10481da177e4SLinus Torvalds 
1049e63d7dfdSAlexander Aring static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1050e63d7dfdSAlexander Aring 		      struct netlink_ext_ack *extack)
10511da177e4SLinus Torvalds {
10521da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10531da177e4SLinus Torvalds 	int ret;
10541da177e4SLinus Torvalds 
1055634576a1SNikolay Aleksandrov 	qdisc_watchdog_init(&q->watchdog, sch);
1056634576a1SNikolay Aleksandrov 
10571da177e4SLinus Torvalds 	if (!opt)
10581da177e4SLinus Torvalds 		return -EINVAL;
10591da177e4SLinus Torvalds 
1060661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
10612030721cSAlexander Aring 	ret = netem_change(sch, opt, extack);
106250612537SEric Dumazet 	if (ret)
1063250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
10641da177e4SLinus Torvalds 	return ret;
10651da177e4SLinus Torvalds }
10661da177e4SLinus Torvalds 
10671da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
10681da177e4SLinus Torvalds {
10691da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10701da177e4SLinus Torvalds 
107159cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
107250612537SEric Dumazet 	if (q->qdisc)
107386bd446bSVlad Buslov 		qdisc_put(q->qdisc);
10746373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
10750a9fe5c3SYousuk Seung 	dist_free(q->slot_dist);
10761da177e4SLinus Torvalds }
10771da177e4SLinus Torvalds 
1078661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
1079661b7972Sstephen hemminger 			   struct sk_buff *skb)
1080661b7972Sstephen hemminger {
1081661b7972Sstephen hemminger 	struct nlattr *nest;
1082661b7972Sstephen hemminger 
1083ae0be8deSMichal Kubecek 	nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
1084661b7972Sstephen hemminger 	if (nest == NULL)
1085661b7972Sstephen hemminger 		goto nla_put_failure;
1086661b7972Sstephen hemminger 
1087661b7972Sstephen hemminger 	switch (q->loss_model) {
1088661b7972Sstephen hemminger 	case CLG_RANDOM:
1089661b7972Sstephen hemminger 		/* legacy loss model */
1090661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
1091661b7972Sstephen hemminger 		return 0;	/* no data */
1092661b7972Sstephen hemminger 
1093661b7972Sstephen hemminger 	case CLG_4_STATES: {
1094661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
1095661b7972Sstephen hemminger 			.p13 = q->clg.a1,
1096661b7972Sstephen hemminger 			.p31 = q->clg.a2,
1097661b7972Sstephen hemminger 			.p32 = q->clg.a3,
1098661b7972Sstephen hemminger 			.p14 = q->clg.a4,
1099661b7972Sstephen hemminger 			.p23 = q->clg.a5,
1100661b7972Sstephen hemminger 		};
1101661b7972Sstephen hemminger 
11021b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
11031b34ec43SDavid S. Miller 			goto nla_put_failure;
1104661b7972Sstephen hemminger 		break;
1105661b7972Sstephen hemminger 	}
1106661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
1107661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
1108661b7972Sstephen hemminger 			.p = q->clg.a1,
1109661b7972Sstephen hemminger 			.r = q->clg.a2,
1110661b7972Sstephen hemminger 			.h = q->clg.a3,
1111661b7972Sstephen hemminger 			.k1 = q->clg.a4,
1112661b7972Sstephen hemminger 		};
1113661b7972Sstephen hemminger 
11141b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
11151b34ec43SDavid S. Miller 			goto nla_put_failure;
1116661b7972Sstephen hemminger 		break;
1117661b7972Sstephen hemminger 	}
1118661b7972Sstephen hemminger 	}
1119661b7972Sstephen hemminger 
1120661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
1121661b7972Sstephen hemminger 	return 0;
1122661b7972Sstephen hemminger 
1123661b7972Sstephen hemminger nla_put_failure:
1124661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
1125661b7972Sstephen hemminger 	return -1;
1126661b7972Sstephen hemminger }
1127661b7972Sstephen hemminger 
11281da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
11291da177e4SLinus Torvalds {
11301da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
1131861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
11321da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
11331da177e4SLinus Torvalds 	struct tc_netem_corr cor;
11340dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
1135c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
11367bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
1137836af83bSDave Taht 	struct tc_netem_slot slot;
11381da177e4SLinus Torvalds 
1139112f9cb6SDave Taht 	qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1140112f9cb6SDave Taht 			     UINT_MAX);
1141112f9cb6SDave Taht 	qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1142112f9cb6SDave Taht 			    UINT_MAX);
11431da177e4SLinus Torvalds 	qopt.limit = q->limit;
11441da177e4SLinus Torvalds 	qopt.loss = q->loss;
11451da177e4SLinus Torvalds 	qopt.gap = q->gap;
11461da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
11471b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
11481b34ec43SDavid S. Miller 		goto nla_put_failure;
11491da177e4SLinus Torvalds 
115099803171SDave Taht 	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
115199803171SDave Taht 		goto nla_put_failure;
115299803171SDave Taht 
115399803171SDave Taht 	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
115499803171SDave Taht 		goto nla_put_failure;
115599803171SDave Taht 
11561da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
11571da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
11581da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
11591b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
11601b34ec43SDavid S. Miller 		goto nla_put_failure;
11610dca51d3SStephen Hemminger 
11620dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
11630dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
11641b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
11651b34ec43SDavid S. Miller 		goto nla_put_failure;
11660dca51d3SStephen Hemminger 
1167c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
1168c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
11691b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
11701b34ec43SDavid S. Miller 		goto nla_put_failure;
1171c865e5d9SStephen Hemminger 
11726a031f67SYang Yingliang 	if (q->rate >= (1ULL << 32)) {
11732a51c1e8SNicolas Dichtel 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
11742a51c1e8SNicolas Dichtel 				      TCA_NETEM_PAD))
11756a031f67SYang Yingliang 			goto nla_put_failure;
11766a031f67SYang Yingliang 		rate.rate = ~0U;
11776a031f67SYang Yingliang 	} else {
11787bc0f28cSHagen Paul Pfeifer 		rate.rate = q->rate;
11796a031f67SYang Yingliang 	}
118090b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
118190b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
118290b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
11831b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
11841b34ec43SDavid S. Miller 		goto nla_put_failure;
11857bc0f28cSHagen Paul Pfeifer 
1186e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1187e4ae004bSEric Dumazet 		goto nla_put_failure;
1188e4ae004bSEric Dumazet 
1189661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
1190661b7972Sstephen hemminger 		goto nla_put_failure;
1191661b7972Sstephen hemminger 
11920a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
11930a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter) {
1194836af83bSDave Taht 		slot = q->slot_config;
1195836af83bSDave Taht 		if (slot.max_packets == INT_MAX)
1196836af83bSDave Taht 			slot.max_packets = 0;
1197836af83bSDave Taht 		if (slot.max_bytes == INT_MAX)
1198836af83bSDave Taht 			slot.max_bytes = 0;
1199836af83bSDave Taht 		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1200836af83bSDave Taht 			goto nla_put_failure;
1201836af83bSDave Taht 	}
1202836af83bSDave Taht 
1203861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
12041da177e4SLinus Torvalds 
12051e90474cSPatrick McHardy nla_put_failure:
1206861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
12071da177e4SLinus Torvalds 	return -1;
12081da177e4SLinus Torvalds }
12091da177e4SLinus Torvalds 
121010f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
121110f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
121210f6dfcfSstephen hemminger {
121310f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
121410f6dfcfSstephen hemminger 
121550612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
121610f6dfcfSstephen hemminger 		return -ENOENT;
121710f6dfcfSstephen hemminger 
121810f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
121910f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
122010f6dfcfSstephen hemminger 
122110f6dfcfSstephen hemminger 	return 0;
122210f6dfcfSstephen hemminger }
122310f6dfcfSstephen hemminger 
122410f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1225653d6fd6SAlexander Aring 		     struct Qdisc **old, struct netlink_ext_ack *extack)
122610f6dfcfSstephen hemminger {
122710f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
122810f6dfcfSstephen hemminger 
122986a7996cSWANG Cong 	*old = qdisc_replace(sch, new, &q->qdisc);
123010f6dfcfSstephen hemminger 	return 0;
123110f6dfcfSstephen hemminger }
123210f6dfcfSstephen hemminger 
123310f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
123410f6dfcfSstephen hemminger {
123510f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
123610f6dfcfSstephen hemminger 	return q->qdisc;
123710f6dfcfSstephen hemminger }
123810f6dfcfSstephen hemminger 
1239143976ceSWANG Cong static unsigned long netem_find(struct Qdisc *sch, u32 classid)
124010f6dfcfSstephen hemminger {
124110f6dfcfSstephen hemminger 	return 1;
124210f6dfcfSstephen hemminger }
124310f6dfcfSstephen hemminger 
124410f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
124510f6dfcfSstephen hemminger {
124610f6dfcfSstephen hemminger 	if (!walker->stop) {
124710f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
124810f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
124910f6dfcfSstephen hemminger 				walker->stop = 1;
125010f6dfcfSstephen hemminger 				return;
125110f6dfcfSstephen hemminger 			}
125210f6dfcfSstephen hemminger 		walker->count++;
125310f6dfcfSstephen hemminger 	}
125410f6dfcfSstephen hemminger }
125510f6dfcfSstephen hemminger 
125610f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
125710f6dfcfSstephen hemminger 	.graft		=	netem_graft,
125810f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
1259143976ceSWANG Cong 	.find		=	netem_find,
126010f6dfcfSstephen hemminger 	.walk		=	netem_walk,
126110f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
126210f6dfcfSstephen hemminger };
126310f6dfcfSstephen hemminger 
126420fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
12651da177e4SLinus Torvalds 	.id		=	"netem",
126610f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
12671da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
12681da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
12691da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
127077be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
12711da177e4SLinus Torvalds 	.init		=	netem_init,
12721da177e4SLinus Torvalds 	.reset		=	netem_reset,
12731da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
12741da177e4SLinus Torvalds 	.change		=	netem_change,
12751da177e4SLinus Torvalds 	.dump		=	netem_dump,
12761da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
12771da177e4SLinus Torvalds };
12781da177e4SLinus Torvalds 
12791da177e4SLinus Torvalds 
12801da177e4SLinus Torvalds static int __init netem_module_init(void)
12811da177e4SLinus Torvalds {
1282eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
12831da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
12841da177e4SLinus Torvalds }
12851da177e4SLinus Torvalds static void __exit netem_module_exit(void)
12861da177e4SLinus Torvalds {
12871da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
12881da177e4SLinus Torvalds }
12891da177e4SLinus Torvalds module_init(netem_module_init)
12901da177e4SLinus Torvalds module_exit(netem_module_exit)
12911da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1292