xref: /openbmc/linux/net/sched/sch_netem.c (revision 2030721c)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
26aec0a40aSEric Dumazet #include <linux/rbtree.h>
271da177e4SLinus Torvalds 
28dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
291da177e4SLinus Torvalds #include <net/pkt_sched.h>
30e4ae004bSEric Dumazet #include <net/inet_ecn.h>
311da177e4SLinus Torvalds 
32250a65f7Sstephen hemminger #define VERSION "1.3"
33eb229c4cSStephen Hemminger 
341da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
351da177e4SLinus Torvalds 	====================================
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
381da177e4SLinus Torvalds 		 Network Emulation Tool
391da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
401da177e4SLinus Torvalds 
411da177e4SLinus Torvalds 	 ----------------------------------------------------------------
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
441da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
451da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
461da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
471da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
481da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
491da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
521da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
531da177e4SLinus Torvalds 	 control either since that can be handled by using token
541da177e4SLinus Torvalds 	 bucket or other rate control.
55661b7972Sstephen hemminger 
56661b7972Sstephen hemminger      Correlated Loss Generator models
57661b7972Sstephen hemminger 
58661b7972Sstephen hemminger 	Added generation of correlated loss according to the
59661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
60661b7972Sstephen hemminger 
61661b7972Sstephen hemminger 	References:
62661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
65661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
66661b7972Sstephen hemminger 
67661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
691da177e4SLinus Torvalds */
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds struct netem_sched_data {
72aec0a40aSEric Dumazet 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
73aec0a40aSEric Dumazet 	struct rb_root t_root;
7450612537SEric Dumazet 
7550612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
761da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7750612537SEric Dumazet 
7859cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
791da177e4SLinus Torvalds 
80112f9cb6SDave Taht 	s64 latency;
81112f9cb6SDave Taht 	s64 jitter;
82b407621cSStephen Hemminger 
831da177e4SLinus Torvalds 	u32 loss;
84e4ae004bSEric Dumazet 	u32 ecn;
851da177e4SLinus Torvalds 	u32 limit;
861da177e4SLinus Torvalds 	u32 counter;
871da177e4SLinus Torvalds 	u32 gap;
881da177e4SLinus Torvalds 	u32 duplicate;
890dca51d3SStephen Hemminger 	u32 reorder;
90c865e5d9SStephen Hemminger 	u32 corrupt;
916a031f67SYang Yingliang 	u64 rate;
9290b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9390b41a1cSHagen Paul Pfeifer 	u32 cell_size;
94809fa972SHannes Frederic Sowa 	struct reciprocal_value cell_size_reciprocal;
9590b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds 	struct crndstate {
98b407621cSStephen Hemminger 		u32 last;
99b407621cSStephen Hemminger 		u32 rho;
100c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1011da177e4SLinus Torvalds 
1021da177e4SLinus Torvalds 	struct disttable {
1031da177e4SLinus Torvalds 		u32  size;
1041da177e4SLinus Torvalds 		s16 table[0];
1051da177e4SLinus Torvalds 	} *delay_dist;
106661b7972Sstephen hemminger 
107661b7972Sstephen hemminger 	enum  {
108661b7972Sstephen hemminger 		CLG_RANDOM,
109661b7972Sstephen hemminger 		CLG_4_STATES,
110661b7972Sstephen hemminger 		CLG_GILB_ELL,
111661b7972Sstephen hemminger 	} loss_model;
112661b7972Sstephen hemminger 
113a6e2fe17SYang Yingliang 	enum {
114a6e2fe17SYang Yingliang 		TX_IN_GAP_PERIOD = 1,
115a6e2fe17SYang Yingliang 		TX_IN_BURST_PERIOD,
116a6e2fe17SYang Yingliang 		LOST_IN_GAP_PERIOD,
117a6e2fe17SYang Yingliang 		LOST_IN_BURST_PERIOD,
118a6e2fe17SYang Yingliang 	} _4_state_model;
119a6e2fe17SYang Yingliang 
120c045a734SYang Yingliang 	enum {
121c045a734SYang Yingliang 		GOOD_STATE = 1,
122c045a734SYang Yingliang 		BAD_STATE,
123c045a734SYang Yingliang 	} GE_state_model;
124c045a734SYang Yingliang 
125661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
126661b7972Sstephen hemminger 	struct clgstate {
127661b7972Sstephen hemminger 		/* state of the Markov chain */
128661b7972Sstephen hemminger 		u8 state;
129661b7972Sstephen hemminger 
130661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
131661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
132661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
133661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
134661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
135661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
136661b7972Sstephen hemminger 	} clg;
137661b7972Sstephen hemminger 
138836af83bSDave Taht 	struct tc_netem_slot slot_config;
139836af83bSDave Taht 	struct slotstate {
140836af83bSDave Taht 		u64 slot_next;
141836af83bSDave Taht 		s32 packets_left;
142836af83bSDave Taht 		s32 bytes_left;
143836af83bSDave Taht 	} slot;
144836af83bSDave Taht 
1451da177e4SLinus Torvalds };
1461da177e4SLinus Torvalds 
14750612537SEric Dumazet /* Time stamp put into socket buffer control block
14850612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
14956b17425SEric Dumazet  *
15056b17425SEric Dumazet  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
15156b17425SEric Dumazet  * and skb->next & skb->prev are scratch space for a qdisc,
15256b17425SEric Dumazet  * we save skb->tstamp value in skb->cb[] before destroying it.
15350612537SEric Dumazet  */
1541da177e4SLinus Torvalds struct netem_skb_cb {
155112f9cb6SDave Taht 	u64	        time_to_send;
1561da177e4SLinus Torvalds };
1571da177e4SLinus Torvalds 
1585f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1595f86173bSJussi Kivilinna {
160aec0a40aSEric Dumazet 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
16116bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
162175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1635f86173bSJussi Kivilinna }
1645f86173bSJussi Kivilinna 
1651da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1661da177e4SLinus Torvalds  * Use entropy source for initial seed.
1671da177e4SLinus Torvalds  */
1681da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1691da177e4SLinus Torvalds {
1701da177e4SLinus Torvalds 	state->rho = rho;
17163862b5bSAruna-Hewapathirane 	state->last = prandom_u32();
1721da177e4SLinus Torvalds }
1731da177e4SLinus Torvalds 
1741da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1751da177e4SLinus Torvalds  * Next number depends on last value.
1761da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1771da177e4SLinus Torvalds  */
178b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1791da177e4SLinus Torvalds {
1801da177e4SLinus Torvalds 	u64 value, rho;
1811da177e4SLinus Torvalds 	unsigned long answer;
1821da177e4SLinus Torvalds 
183bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
18463862b5bSAruna-Hewapathirane 		return prandom_u32();
1851da177e4SLinus Torvalds 
18663862b5bSAruna-Hewapathirane 	value = prandom_u32();
1871da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1881da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1891da177e4SLinus Torvalds 	state->last = answer;
1901da177e4SLinus Torvalds 	return answer;
1911da177e4SLinus Torvalds }
1921da177e4SLinus Torvalds 
193661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
194661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
195661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
196661b7972Sstephen hemminger  */
197661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
198661b7972Sstephen hemminger {
199661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
20063862b5bSAruna-Hewapathirane 	u32 rnd = prandom_u32();
201661b7972Sstephen hemminger 
202661b7972Sstephen hemminger 	/*
20325985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
204661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
205661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
206661b7972Sstephen hemminger 	 * The four states correspond to:
207a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
208a6e2fe17SYang Yingliang 	 *   LOST_IN_BURST_PERIOD => isolated losses within a gap period
209a6e2fe17SYang Yingliang 	 *   LOST_IN_GAP_PERIOD => lost packets within a burst period
210a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
211661b7972Sstephen hemminger 	 */
212661b7972Sstephen hemminger 	switch (clg->state) {
213a6e2fe17SYang Yingliang 	case TX_IN_GAP_PERIOD:
214661b7972Sstephen hemminger 		if (rnd < clg->a4) {
215a6e2fe17SYang Yingliang 			clg->state = LOST_IN_BURST_PERIOD;
216661b7972Sstephen hemminger 			return true;
217ab6c27beSstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
218a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
219661b7972Sstephen hemminger 			return true;
220a6e2fe17SYang Yingliang 		} else if (clg->a1 + clg->a4 < rnd) {
221a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
222a6e2fe17SYang Yingliang 		}
223661b7972Sstephen hemminger 
224661b7972Sstephen hemminger 		break;
225a6e2fe17SYang Yingliang 	case TX_IN_BURST_PERIOD:
226661b7972Sstephen hemminger 		if (rnd < clg->a5) {
227a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
228661b7972Sstephen hemminger 			return true;
229a6e2fe17SYang Yingliang 		} else {
230a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
231a6e2fe17SYang Yingliang 		}
232661b7972Sstephen hemminger 
233661b7972Sstephen hemminger 		break;
234a6e2fe17SYang Yingliang 	case LOST_IN_GAP_PERIOD:
235661b7972Sstephen hemminger 		if (rnd < clg->a3)
236a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
237661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
238a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
239661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
240a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
241661b7972Sstephen hemminger 			return true;
242661b7972Sstephen hemminger 		}
243661b7972Sstephen hemminger 		break;
244a6e2fe17SYang Yingliang 	case LOST_IN_BURST_PERIOD:
245a6e2fe17SYang Yingliang 		clg->state = TX_IN_GAP_PERIOD;
246661b7972Sstephen hemminger 		break;
247661b7972Sstephen hemminger 	}
248661b7972Sstephen hemminger 
249661b7972Sstephen hemminger 	return false;
250661b7972Sstephen hemminger }
251661b7972Sstephen hemminger 
252661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
253661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
254661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
255661b7972Sstephen hemminger  *
25625985edcSLucas De Marchi  * Makes a comparison between random number and the transition
257661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
25825985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
259661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
260661b7972Sstephen hemminger  * packet will be transmitted or lost.
261661b7972Sstephen hemminger  */
262661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
263661b7972Sstephen hemminger {
264661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
265661b7972Sstephen hemminger 
266661b7972Sstephen hemminger 	switch (clg->state) {
267c045a734SYang Yingliang 	case GOOD_STATE:
26863862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a1)
269c045a734SYang Yingliang 			clg->state = BAD_STATE;
27063862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a4)
271661b7972Sstephen hemminger 			return true;
2727c2781faSstephen hemminger 		break;
273c045a734SYang Yingliang 	case BAD_STATE:
27463862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a2)
275c045a734SYang Yingliang 			clg->state = GOOD_STATE;
27663862b5bSAruna-Hewapathirane 		if (prandom_u32() > clg->a3)
277661b7972Sstephen hemminger 			return true;
278661b7972Sstephen hemminger 	}
279661b7972Sstephen hemminger 
280661b7972Sstephen hemminger 	return false;
281661b7972Sstephen hemminger }
282661b7972Sstephen hemminger 
283661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
284661b7972Sstephen hemminger {
285661b7972Sstephen hemminger 	switch (q->loss_model) {
286661b7972Sstephen hemminger 	case CLG_RANDOM:
287661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
288661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
289661b7972Sstephen hemminger 
290661b7972Sstephen hemminger 	case CLG_4_STATES:
291661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
292661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
293661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
294661b7972Sstephen hemminger 		* the kernel logs
295661b7972Sstephen hemminger 		*/
296661b7972Sstephen hemminger 		return loss_4state(q);
297661b7972Sstephen hemminger 
298661b7972Sstephen hemminger 	case CLG_GILB_ELL:
299661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
300661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
301661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
302661b7972Sstephen hemminger 		* the kernel logs
303661b7972Sstephen hemminger 		*/
304661b7972Sstephen hemminger 		return loss_gilb_ell(q);
305661b7972Sstephen hemminger 	}
306661b7972Sstephen hemminger 
307661b7972Sstephen hemminger 	return false;	/* not reached */
308661b7972Sstephen hemminger }
309661b7972Sstephen hemminger 
310661b7972Sstephen hemminger 
3111da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
3121da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
3131da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
3141da177e4SLinus Torvalds  */
3159b0ed891SStephen Hemminger static s64 tabledist(s64 mu, s32 sigma,
316b407621cSStephen Hemminger 		     struct crndstate *state,
317b407621cSStephen Hemminger 		     const struct disttable *dist)
3181da177e4SLinus Torvalds {
319112f9cb6SDave Taht 	s64 x;
320b407621cSStephen Hemminger 	long t;
321b407621cSStephen Hemminger 	u32 rnd;
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds 	if (sigma == 0)
3241da177e4SLinus Torvalds 		return mu;
3251da177e4SLinus Torvalds 
3261da177e4SLinus Torvalds 	rnd = get_crandom(state);
3271da177e4SLinus Torvalds 
3281da177e4SLinus Torvalds 	/* default uniform distribution */
3291da177e4SLinus Torvalds 	if (dist == NULL)
3301da177e4SLinus Torvalds 		return (rnd % (2 * sigma)) - sigma + mu;
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3331da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3341da177e4SLinus Torvalds 	if (x >= 0)
3351da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3361da177e4SLinus Torvalds 	else
3371da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3381da177e4SLinus Torvalds 
3391da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3401da177e4SLinus Torvalds }
3411da177e4SLinus Torvalds 
342bce552fdSStephen Hemminger static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
3437bc0f28cSHagen Paul Pfeifer {
34490b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
34590b41a1cSHagen Paul Pfeifer 
34690b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
34790b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
34890b41a1cSHagen Paul Pfeifer 
34990b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
35090b41a1cSHagen Paul Pfeifer 			cells++;
35190b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
35290b41a1cSHagen Paul Pfeifer 	}
353bce552fdSStephen Hemminger 
354bce552fdSStephen Hemminger 	return div64_u64(len * NSEC_PER_SEC, q->rate);
3557bc0f28cSHagen Paul Pfeifer }
3567bc0f28cSHagen Paul Pfeifer 
357ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch)
358ff704050Sstephen hemminger {
359ff704050Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3603aa605f2SEric Dumazet 	struct rb_node *p = rb_first(&q->t_root);
361ff704050Sstephen hemminger 
3623aa605f2SEric Dumazet 	while (p) {
36318a4c0eaSEric Dumazet 		struct sk_buff *skb = rb_to_skb(p);
364ff704050Sstephen hemminger 
3653aa605f2SEric Dumazet 		p = rb_next(p);
3663aa605f2SEric Dumazet 		rb_erase(&skb->rbnode, &q->t_root);
3672f08a9a1SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
368ff704050Sstephen hemminger 	}
369ff704050Sstephen hemminger }
370ff704050Sstephen hemminger 
371960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
37250612537SEric Dumazet {
373aec0a40aSEric Dumazet 	struct netem_sched_data *q = qdisc_priv(sch);
374112f9cb6SDave Taht 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
375aec0a40aSEric Dumazet 	struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
37650612537SEric Dumazet 
377aec0a40aSEric Dumazet 	while (*p) {
378aec0a40aSEric Dumazet 		struct sk_buff *skb;
37950612537SEric Dumazet 
380aec0a40aSEric Dumazet 		parent = *p;
38118a4c0eaSEric Dumazet 		skb = rb_to_skb(parent);
38250612537SEric Dumazet 		if (tnext >= netem_skb_cb(skb)->time_to_send)
383aec0a40aSEric Dumazet 			p = &parent->rb_right;
384aec0a40aSEric Dumazet 		else
385aec0a40aSEric Dumazet 			p = &parent->rb_left;
38650612537SEric Dumazet 	}
38756b17425SEric Dumazet 	rb_link_node(&nskb->rbnode, parent, p);
38856b17425SEric Dumazet 	rb_insert_color(&nskb->rbnode, &q->t_root);
389aec0a40aSEric Dumazet 	sch->q.qlen++;
39050612537SEric Dumazet }
39150612537SEric Dumazet 
3926071bd1aSNeil Horman /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
3936071bd1aSNeil Horman  * when we statistically choose to corrupt one, we instead segment it, returning
3946071bd1aSNeil Horman  * the first packet to be corrupted, and re-enqueue the remaining frames
3956071bd1aSNeil Horman  */
396520ac30fSEric Dumazet static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
397520ac30fSEric Dumazet 				     struct sk_buff **to_free)
3986071bd1aSNeil Horman {
3996071bd1aSNeil Horman 	struct sk_buff *segs;
4006071bd1aSNeil Horman 	netdev_features_t features = netif_skb_features(skb);
4016071bd1aSNeil Horman 
4026071bd1aSNeil Horman 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
4036071bd1aSNeil Horman 
4046071bd1aSNeil Horman 	if (IS_ERR_OR_NULL(segs)) {
405520ac30fSEric Dumazet 		qdisc_drop(skb, sch, to_free);
4066071bd1aSNeil Horman 		return NULL;
4076071bd1aSNeil Horman 	}
4086071bd1aSNeil Horman 	consume_skb(skb);
4096071bd1aSNeil Horman 	return segs;
4106071bd1aSNeil Horman }
4116071bd1aSNeil Horman 
41248da34b7SFlorian Westphal static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
41348da34b7SFlorian Westphal {
41448da34b7SFlorian Westphal 	skb->next = qh->head;
41548da34b7SFlorian Westphal 
41648da34b7SFlorian Westphal 	if (!qh->head)
41748da34b7SFlorian Westphal 		qh->tail = skb;
41848da34b7SFlorian Westphal 	qh->head = skb;
41948da34b7SFlorian Westphal 	qh->qlen++;
42048da34b7SFlorian Westphal }
42148da34b7SFlorian Westphal 
4220afb51e7SStephen Hemminger /*
4230afb51e7SStephen Hemminger  * Insert one skb into qdisc.
4240afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
4250afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
4260afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
4270afb51e7SStephen Hemminger  */
428520ac30fSEric Dumazet static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
429520ac30fSEric Dumazet 			 struct sk_buff **to_free)
4301da177e4SLinus Torvalds {
4311da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
43289e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
43389e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
4340afb51e7SStephen Hemminger 	struct sk_buff *skb2;
4356071bd1aSNeil Horman 	struct sk_buff *segs = NULL;
4366071bd1aSNeil Horman 	unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
4376071bd1aSNeil Horman 	int nb = 0;
4380afb51e7SStephen Hemminger 	int count = 1;
4396071bd1aSNeil Horman 	int rc = NET_XMIT_SUCCESS;
4401da177e4SLinus Torvalds 
4410afb51e7SStephen Hemminger 	/* Random duplication */
4420afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
4430afb51e7SStephen Hemminger 		++count;
4440afb51e7SStephen Hemminger 
445661b7972Sstephen hemminger 	/* Drop packet? */
446e4ae004bSEric Dumazet 	if (loss_event(q)) {
447e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
44825331d6cSJohn Fastabend 			qdisc_qstats_drop(sch); /* mark packet */
449e4ae004bSEric Dumazet 		else
4500afb51e7SStephen Hemminger 			--count;
451e4ae004bSEric Dumazet 	}
4520afb51e7SStephen Hemminger 	if (count == 0) {
45325331d6cSJohn Fastabend 		qdisc_qstats_drop(sch);
454520ac30fSEric Dumazet 		__qdisc_drop(skb, to_free);
455c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
4561da177e4SLinus Torvalds 	}
4571da177e4SLinus Torvalds 
4585a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
4595a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
4605a308f40SEric Dumazet 	 */
4615080f39eSNik Unger 	if (q->latency || q->jitter || q->rate)
462f2f872f9SEric Dumazet 		skb_orphan_partial(skb);
4634e8a5201SDavid S. Miller 
4640afb51e7SStephen Hemminger 	/*
4650afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
4660afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
4670afb51e7SStephen Hemminger 	 * skb will be queued.
468d5d75cd6SStephen Hemminger 	 */
4690afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
4707698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
4710afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
472d5d75cd6SStephen Hemminger 
473b396cca6SEric Dumazet 		q->duplicate = 0;
474520ac30fSEric Dumazet 		rootq->enqueue(skb2, rootq, to_free);
4750afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4761da177e4SLinus Torvalds 	}
4771da177e4SLinus Torvalds 
478c865e5d9SStephen Hemminger 	/*
479c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
480c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
481c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
482c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
483c865e5d9SStephen Hemminger 	 */
484c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
4856071bd1aSNeil Horman 		if (skb_is_gso(skb)) {
486520ac30fSEric Dumazet 			segs = netem_segment(skb, sch, to_free);
4876071bd1aSNeil Horman 			if (!segs)
4886071bd1aSNeil Horman 				return NET_XMIT_DROP;
4896071bd1aSNeil Horman 		} else {
4906071bd1aSNeil Horman 			segs = skb;
4916071bd1aSNeil Horman 		}
4926071bd1aSNeil Horman 
4936071bd1aSNeil Horman 		skb = segs;
4946071bd1aSNeil Horman 		segs = segs->next;
4956071bd1aSNeil Horman 
4968a6e9c67SEric Dumazet 		skb = skb_unshare(skb, GFP_ATOMIC);
4978a6e9c67SEric Dumazet 		if (unlikely(!skb)) {
4988a6e9c67SEric Dumazet 			qdisc_qstats_drop(sch);
4998a6e9c67SEric Dumazet 			goto finish_segs;
5008a6e9c67SEric Dumazet 		}
5018a6e9c67SEric Dumazet 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
5028a6e9c67SEric Dumazet 		    skb_checksum_help(skb)) {
5038a6e9c67SEric Dumazet 			qdisc_drop(skb, sch, to_free);
5046071bd1aSNeil Horman 			goto finish_segs;
5056071bd1aSNeil Horman 		}
506c865e5d9SStephen Hemminger 
50763862b5bSAruna-Hewapathirane 		skb->data[prandom_u32() % skb_headlen(skb)] ^=
50863862b5bSAruna-Hewapathirane 			1<<(prandom_u32() % 8);
509c865e5d9SStephen Hemminger 	}
510c865e5d9SStephen Hemminger 
51197d0678fSFlorian Westphal 	if (unlikely(sch->q.qlen >= sch->limit))
512520ac30fSEric Dumazet 		return qdisc_drop(skb, sch, to_free);
513960fb66eSEric Dumazet 
51425331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
515960fb66eSEric Dumazet 
5165f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
517f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
518a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
519f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
520112f9cb6SDave Taht 		u64 now;
521112f9cb6SDave Taht 		s64 delay;
52207aaa115SStephen Hemminger 
52307aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
52407aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
52507aaa115SStephen Hemminger 
526112f9cb6SDave Taht 		now = ktime_get_ns();
5277bc0f28cSHagen Paul Pfeifer 
5287bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
5295080f39eSNik Unger 			struct netem_skb_cb *last = NULL;
5307bc0f28cSHagen Paul Pfeifer 
5315080f39eSNik Unger 			if (sch->q.tail)
5325080f39eSNik Unger 				last = netem_skb_cb(sch->q.tail);
5335080f39eSNik Unger 			if (q->t_root.rb_node) {
5345080f39eSNik Unger 				struct sk_buff *t_skb;
5355080f39eSNik Unger 				struct netem_skb_cb *t_last;
5365080f39eSNik Unger 
53718a4c0eaSEric Dumazet 				t_skb = skb_rb_last(&q->t_root);
5385080f39eSNik Unger 				t_last = netem_skb_cb(t_skb);
5395080f39eSNik Unger 				if (!last ||
5405080f39eSNik Unger 				    t_last->time_to_send > last->time_to_send) {
5415080f39eSNik Unger 					last = t_last;
5425080f39eSNik Unger 				}
5435080f39eSNik Unger 			}
5445080f39eSNik Unger 
545aec0a40aSEric Dumazet 			if (last) {
5467bc0f28cSHagen Paul Pfeifer 				/*
547a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
548a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
5497bc0f28cSHagen Paul Pfeifer 				 * from delay.
5507bc0f28cSHagen Paul Pfeifer 				 */
5515080f39eSNik Unger 				delay -= last->time_to_send - now;
552112f9cb6SDave Taht 				delay = max_t(s64, 0, delay);
5535080f39eSNik Unger 				now = last->time_to_send;
5547bc0f28cSHagen Paul Pfeifer 			}
555a13d3104SJohannes Naab 
556bce552fdSStephen Hemminger 			delay += packet_time_ns(qdisc_pkt_len(skb), q);
5577bc0f28cSHagen Paul Pfeifer 		}
5587bc0f28cSHagen Paul Pfeifer 
5597c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
5601da177e4SLinus Torvalds 		++q->counter;
561960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
5621da177e4SLinus Torvalds 	} else {
5630dca51d3SStephen Hemminger 		/*
5640dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
5650dca51d3SStephen Hemminger 		 * of the queue.
5660dca51d3SStephen Hemminger 		 */
567112f9cb6SDave Taht 		cb->time_to_send = ktime_get_ns();
5680dca51d3SStephen Hemminger 		q->counter = 0;
5698ba25dadSJarek Poplawski 
57048da34b7SFlorian Westphal 		netem_enqueue_skb_head(&sch->q, skb);
571eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
572378a2f09SJarek Poplawski 	}
5731da177e4SLinus Torvalds 
5746071bd1aSNeil Horman finish_segs:
5756071bd1aSNeil Horman 	if (segs) {
5766071bd1aSNeil Horman 		while (segs) {
5776071bd1aSNeil Horman 			skb2 = segs->next;
5786071bd1aSNeil Horman 			segs->next = NULL;
5796071bd1aSNeil Horman 			qdisc_skb_cb(segs)->pkt_len = segs->len;
5806071bd1aSNeil Horman 			last_len = segs->len;
581520ac30fSEric Dumazet 			rc = qdisc_enqueue(segs, sch, to_free);
5826071bd1aSNeil Horman 			if (rc != NET_XMIT_SUCCESS) {
5836071bd1aSNeil Horman 				if (net_xmit_drop_count(rc))
5846071bd1aSNeil Horman 					qdisc_qstats_drop(sch);
5856071bd1aSNeil Horman 			} else {
5866071bd1aSNeil Horman 				nb++;
5876071bd1aSNeil Horman 				len += last_len;
5886071bd1aSNeil Horman 			}
5896071bd1aSNeil Horman 			segs = skb2;
5906071bd1aSNeil Horman 		}
5916071bd1aSNeil Horman 		sch->q.qlen += nb;
5926071bd1aSNeil Horman 		if (nb > 1)
5936071bd1aSNeil Horman 			qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
5946071bd1aSNeil Horman 	}
59510f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
5961da177e4SLinus Torvalds }
5971da177e4SLinus Torvalds 
598836af83bSDave Taht /* Delay the next round with a new future slot with a
599836af83bSDave Taht  * correct number of bytes and packets.
600836af83bSDave Taht  */
601836af83bSDave Taht 
602836af83bSDave Taht static void get_slot_next(struct netem_sched_data *q, u64 now)
603836af83bSDave Taht {
604836af83bSDave Taht 	q->slot.slot_next = now + q->slot_config.min_delay +
605836af83bSDave Taht 		(prandom_u32() *
606836af83bSDave Taht 			(q->slot_config.max_delay -
607836af83bSDave Taht 				q->slot_config.min_delay) >> 32);
608836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
609836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
610836af83bSDave Taht }
611836af83bSDave Taht 
6121da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
6131da177e4SLinus Torvalds {
6141da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6151da177e4SLinus Torvalds 	struct sk_buff *skb;
616aec0a40aSEric Dumazet 	struct rb_node *p;
6171da177e4SLinus Torvalds 
61850612537SEric Dumazet tfifo_dequeue:
619ed760cb8SFlorian Westphal 	skb = __qdisc_dequeue_head(&sch->q);
620771018e7SStephen Hemminger 	if (skb) {
62125331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
6220ad2a836SBeshay, Joseph deliver:
623aec0a40aSEric Dumazet 		qdisc_bstats_update(sch, skb);
624aec0a40aSEric Dumazet 		return skb;
625aec0a40aSEric Dumazet 	}
626aec0a40aSEric Dumazet 	p = rb_first(&q->t_root);
627aec0a40aSEric Dumazet 	if (p) {
628112f9cb6SDave Taht 		u64 time_to_send;
629836af83bSDave Taht 		u64 now = ktime_get_ns();
63036b7bfe0SEric Dumazet 
63118a4c0eaSEric Dumazet 		skb = rb_to_skb(p);
6320f9f32acSStephen Hemminger 
6330f9f32acSStephen Hemminger 		/* if more time remaining? */
63436b7bfe0SEric Dumazet 		time_to_send = netem_skb_cb(skb)->time_to_send;
635836af83bSDave Taht 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
636836af83bSDave Taht 			get_slot_next(q, now);
637aec0a40aSEric Dumazet 
638836af83bSDave Taht 		if (time_to_send <= now &&  q->slot.slot_next <= now) {
639836af83bSDave Taht 			rb_erase(p, &q->t_root);
640aec0a40aSEric Dumazet 			sch->q.qlen--;
6410ad2a836SBeshay, Joseph 			qdisc_qstats_backlog_dec(sch, skb);
642aec0a40aSEric Dumazet 			skb->next = NULL;
643aec0a40aSEric Dumazet 			skb->prev = NULL;
644bffa72cfSEric Dumazet 			/* skb->dev shares skb->rbnode area,
645bffa72cfSEric Dumazet 			 * we need to restore its value.
646bffa72cfSEric Dumazet 			 */
647bffa72cfSEric Dumazet 			skb->dev = qdisc_dev(sch);
64803c05f0dSJarek Poplawski 
6498caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
6508caf1539SJarek Poplawski 			/*
6518caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
6528caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
6538caf1539SJarek Poplawski 			 */
654bc31c905SWillem de Bruijn 			if (skb->tc_redirected && skb->tc_from_ingress)
6552456e855SThomas Gleixner 				skb->tstamp = 0;
6568caf1539SJarek Poplawski #endif
65710f6dfcfSstephen hemminger 
658836af83bSDave Taht 			if (q->slot.slot_next) {
659836af83bSDave Taht 				q->slot.packets_left--;
660836af83bSDave Taht 				q->slot.bytes_left -= qdisc_pkt_len(skb);
661836af83bSDave Taht 				if (q->slot.packets_left <= 0 ||
662836af83bSDave Taht 				    q->slot.bytes_left <= 0)
663836af83bSDave Taht 					get_slot_next(q, now);
664836af83bSDave Taht 			}
665836af83bSDave Taht 
66650612537SEric Dumazet 			if (q->qdisc) {
66721de12eeSEric Dumazet 				unsigned int pkt_len = qdisc_pkt_len(skb);
668520ac30fSEric Dumazet 				struct sk_buff *to_free = NULL;
669520ac30fSEric Dumazet 				int err;
67050612537SEric Dumazet 
671520ac30fSEric Dumazet 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
672520ac30fSEric Dumazet 				kfree_skb_list(to_free);
67321de12eeSEric Dumazet 				if (err != NET_XMIT_SUCCESS &&
67421de12eeSEric Dumazet 				    net_xmit_drop_count(err)) {
67525331d6cSJohn Fastabend 					qdisc_qstats_drop(sch);
6762ccccf5fSWANG Cong 					qdisc_tree_reduce_backlog(sch, 1,
67721de12eeSEric Dumazet 								  pkt_len);
67850612537SEric Dumazet 				}
67950612537SEric Dumazet 				goto tfifo_dequeue;
68050612537SEric Dumazet 			}
681aec0a40aSEric Dumazet 			goto deliver;
68211274e5aSStephen Hemminger 		}
68307aaa115SStephen Hemminger 
68450612537SEric Dumazet 		if (q->qdisc) {
68550612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
68650612537SEric Dumazet 			if (skb)
68750612537SEric Dumazet 				goto deliver;
68850612537SEric Dumazet 		}
689836af83bSDave Taht 
690836af83bSDave Taht 		qdisc_watchdog_schedule_ns(&q->watchdog,
691836af83bSDave Taht 					   max(time_to_send,
692836af83bSDave Taht 					       q->slot.slot_next));
6930f9f32acSStephen Hemminger 	}
6940f9f32acSStephen Hemminger 
69550612537SEric Dumazet 	if (q->qdisc) {
69650612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
69750612537SEric Dumazet 		if (skb)
69850612537SEric Dumazet 			goto deliver;
69950612537SEric Dumazet 	}
7000f9f32acSStephen Hemminger 	return NULL;
7011da177e4SLinus Torvalds }
7021da177e4SLinus Torvalds 
7031da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
7041da177e4SLinus Torvalds {
7051da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7061da177e4SLinus Torvalds 
70750612537SEric Dumazet 	qdisc_reset_queue(sch);
708ff704050Sstephen hemminger 	tfifo_reset(sch);
70950612537SEric Dumazet 	if (q->qdisc)
7101da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
71159cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
7121da177e4SLinus Torvalds }
7131da177e4SLinus Torvalds 
7146373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
7156373a9a2Sstephen hemminger {
7164cb28970SWANG Cong 	kvfree(d);
7176373a9a2Sstephen hemminger }
7186373a9a2Sstephen hemminger 
7191da177e4SLinus Torvalds /*
7201da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
7211da177e4SLinus Torvalds  * signed 16 bit values.
7221da177e4SLinus Torvalds  */
723836af83bSDave Taht 
7241e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
7251da177e4SLinus Torvalds {
7261da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7276373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
7281e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
7297698b4fcSDavid S. Miller 	spinlock_t *root_lock;
7301da177e4SLinus Torvalds 	struct disttable *d;
7311da177e4SLinus Torvalds 	int i;
7321da177e4SLinus Torvalds 
733df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
7341da177e4SLinus Torvalds 		return -EINVAL;
7351da177e4SLinus Torvalds 
736752ade68SMichal Hocko 	d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
7371da177e4SLinus Torvalds 	if (!d)
7381da177e4SLinus Torvalds 		return -ENOMEM;
7391da177e4SLinus Torvalds 
7401da177e4SLinus Torvalds 	d->size = n;
7411da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
7421da177e4SLinus Torvalds 		d->table[i] = data[i];
7431da177e4SLinus Torvalds 
744102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
7457698b4fcSDavid S. Miller 
7467698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
747bb52c7acSEric Dumazet 	swap(q->delay_dist, d);
7487698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
749bb52c7acSEric Dumazet 
750bb52c7acSEric Dumazet 	dist_free(d);
7511da177e4SLinus Torvalds 	return 0;
7521da177e4SLinus Torvalds }
7531da177e4SLinus Torvalds 
754836af83bSDave Taht static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
755836af83bSDave Taht {
756836af83bSDave Taht 	const struct tc_netem_slot *c = nla_data(attr);
757836af83bSDave Taht 
758836af83bSDave Taht 	q->slot_config = *c;
759836af83bSDave Taht 	if (q->slot_config.max_packets == 0)
760836af83bSDave Taht 		q->slot_config.max_packets = INT_MAX;
761836af83bSDave Taht 	if (q->slot_config.max_bytes == 0)
762836af83bSDave Taht 		q->slot_config.max_bytes = INT_MAX;
763836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
764836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
765836af83bSDave Taht 	if (q->slot_config.min_delay | q->slot_config.max_delay)
766836af83bSDave Taht 		q->slot.slot_next = ktime_get_ns();
767836af83bSDave Taht 	else
768836af83bSDave Taht 		q->slot.slot_next = 0;
769836af83bSDave Taht }
770836af83bSDave Taht 
77149545a77SYang Yingliang static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
7721da177e4SLinus Torvalds {
7731e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
7741da177e4SLinus Torvalds 
7751da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
7761da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
7771da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds 
78049545a77SYang Yingliang static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
7810dca51d3SStephen Hemminger {
7821e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
7830dca51d3SStephen Hemminger 
7840dca51d3SStephen Hemminger 	q->reorder = r->probability;
7850dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
7860dca51d3SStephen Hemminger }
7870dca51d3SStephen Hemminger 
78849545a77SYang Yingliang static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
789c865e5d9SStephen Hemminger {
7901e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
791c865e5d9SStephen Hemminger 
792c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
793c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
794c865e5d9SStephen Hemminger }
795c865e5d9SStephen Hemminger 
79649545a77SYang Yingliang static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
7977bc0f28cSHagen Paul Pfeifer {
7987bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
7997bc0f28cSHagen Paul Pfeifer 
8007bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
80190b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
80290b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
803809fa972SHannes Frederic Sowa 	q->cell_overhead = r->cell_overhead;
80490b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
80590b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
806809fa972SHannes Frederic Sowa 	else
807809fa972SHannes Frederic Sowa 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
8087bc0f28cSHagen Paul Pfeifer }
8097bc0f28cSHagen Paul Pfeifer 
81049545a77SYang Yingliang static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
811661b7972Sstephen hemminger {
812661b7972Sstephen hemminger 	const struct nlattr *la;
813661b7972Sstephen hemminger 	int rem;
814661b7972Sstephen hemminger 
815661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
816661b7972Sstephen hemminger 		u16 type = nla_type(la);
817661b7972Sstephen hemminger 
818661b7972Sstephen hemminger 		switch (type) {
819661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
820661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
821661b7972Sstephen hemminger 
8222494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
823661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
824661b7972Sstephen hemminger 				return -EINVAL;
825661b7972Sstephen hemminger 			}
826661b7972Sstephen hemminger 
827661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
828661b7972Sstephen hemminger 
8293fbac2a8SYang Yingliang 			q->clg.state = TX_IN_GAP_PERIOD;
830661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
831661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
832661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
833661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
834661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
835661b7972Sstephen hemminger 			break;
836661b7972Sstephen hemminger 		}
837661b7972Sstephen hemminger 
838661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
839661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
840661b7972Sstephen hemminger 
8412494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
8422494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
843661b7972Sstephen hemminger 				return -EINVAL;
844661b7972Sstephen hemminger 			}
845661b7972Sstephen hemminger 
846661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
8473fbac2a8SYang Yingliang 			q->clg.state = GOOD_STATE;
848661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
849661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
850661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
851661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
852661b7972Sstephen hemminger 			break;
853661b7972Sstephen hemminger 		}
854661b7972Sstephen hemminger 
855661b7972Sstephen hemminger 		default:
856661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
857661b7972Sstephen hemminger 			return -EINVAL;
858661b7972Sstephen hemminger 		}
859661b7972Sstephen hemminger 	}
860661b7972Sstephen hemminger 
861661b7972Sstephen hemminger 	return 0;
862661b7972Sstephen hemminger }
863661b7972Sstephen hemminger 
86427a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
86527a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
86627a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
86727a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
8687bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
869661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
870e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
8716a031f67SYang Yingliang 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
87299803171SDave Taht 	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
87399803171SDave Taht 	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
874836af83bSDave Taht 	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
87527a3421eSPatrick McHardy };
87627a3421eSPatrick McHardy 
8772c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
8782c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
8792c10b32bSThomas Graf {
8802c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
8812c10b32bSThomas Graf 
882661b7972Sstephen hemminger 	if (nested_len < 0) {
883661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
8842c10b32bSThomas Graf 		return -EINVAL;
885661b7972Sstephen hemminger 	}
886661b7972Sstephen hemminger 
8872c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
8882c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
889fceb6435SJohannes Berg 				 nested_len, policy, NULL);
890661b7972Sstephen hemminger 
8912c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
8922c10b32bSThomas Graf 	return 0;
8932c10b32bSThomas Graf }
8942c10b32bSThomas Graf 
895c865e5d9SStephen Hemminger /* Parse netlink message to set options */
8962030721cSAlexander Aring static int netem_change(struct Qdisc *sch, struct nlattr *opt,
8972030721cSAlexander Aring 			struct netlink_ext_ack *extack)
8981da177e4SLinus Torvalds {
8991da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
900b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
9011da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
90254a4b05cSYang Yingliang 	struct clgstate old_clg;
90354a4b05cSYang Yingliang 	int old_loss_model = CLG_RANDOM;
9041da177e4SLinus Torvalds 	int ret;
9051da177e4SLinus Torvalds 
906b03f4672SPatrick McHardy 	if (opt == NULL)
9071da177e4SLinus Torvalds 		return -EINVAL;
9081da177e4SLinus Torvalds 
9092c10b32bSThomas Graf 	qopt = nla_data(opt);
9102c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
911b03f4672SPatrick McHardy 	if (ret < 0)
912b03f4672SPatrick McHardy 		return ret;
913b03f4672SPatrick McHardy 
91454a4b05cSYang Yingliang 	/* backup q->clg and q->loss_model */
91554a4b05cSYang Yingliang 	old_clg = q->clg;
91654a4b05cSYang Yingliang 	old_loss_model = q->loss_model;
91754a4b05cSYang Yingliang 
91854a4b05cSYang Yingliang 	if (tb[TCA_NETEM_LOSS]) {
91949545a77SYang Yingliang 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
92054a4b05cSYang Yingliang 		if (ret) {
92154a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
92254a4b05cSYang Yingliang 			return ret;
92354a4b05cSYang Yingliang 		}
92454a4b05cSYang Yingliang 	} else {
92554a4b05cSYang Yingliang 		q->loss_model = CLG_RANDOM;
92654a4b05cSYang Yingliang 	}
92754a4b05cSYang Yingliang 
92854a4b05cSYang Yingliang 	if (tb[TCA_NETEM_DELAY_DIST]) {
92954a4b05cSYang Yingliang 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
93054a4b05cSYang Yingliang 		if (ret) {
93154a4b05cSYang Yingliang 			/* recover clg and loss_model, in case of
93254a4b05cSYang Yingliang 			 * q->clg and q->loss_model were modified
93354a4b05cSYang Yingliang 			 * in get_loss_clg()
93454a4b05cSYang Yingliang 			 */
93554a4b05cSYang Yingliang 			q->clg = old_clg;
93654a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
93754a4b05cSYang Yingliang 			return ret;
93854a4b05cSYang Yingliang 		}
93954a4b05cSYang Yingliang 	}
94054a4b05cSYang Yingliang 
94150612537SEric Dumazet 	sch->limit = qopt->limit;
9421da177e4SLinus Torvalds 
943112f9cb6SDave Taht 	q->latency = PSCHED_TICKS2NS(qopt->latency);
944112f9cb6SDave Taht 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
9451da177e4SLinus Torvalds 	q->limit = qopt->limit;
9461da177e4SLinus Torvalds 	q->gap = qopt->gap;
9470dca51d3SStephen Hemminger 	q->counter = 0;
9481da177e4SLinus Torvalds 	q->loss = qopt->loss;
9491da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
9501da177e4SLinus Torvalds 
951bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
952bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
9530dca51d3SStephen Hemminger 	 */
954a362e0a7SStephen Hemminger 	if (q->gap)
9550dca51d3SStephen Hemminger 		q->reorder = ~0;
9560dca51d3SStephen Hemminger 
957265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
95849545a77SYang Yingliang 		get_correlation(q, tb[TCA_NETEM_CORR]);
9591da177e4SLinus Torvalds 
960265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
96149545a77SYang Yingliang 		get_reorder(q, tb[TCA_NETEM_REORDER]);
9621da177e4SLinus Torvalds 
963265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
96449545a77SYang Yingliang 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
9651da177e4SLinus Torvalds 
9667bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
96749545a77SYang Yingliang 		get_rate(q, tb[TCA_NETEM_RATE]);
9687bc0f28cSHagen Paul Pfeifer 
9696a031f67SYang Yingliang 	if (tb[TCA_NETEM_RATE64])
9706a031f67SYang Yingliang 		q->rate = max_t(u64, q->rate,
9716a031f67SYang Yingliang 				nla_get_u64(tb[TCA_NETEM_RATE64]));
9726a031f67SYang Yingliang 
97399803171SDave Taht 	if (tb[TCA_NETEM_LATENCY64])
97499803171SDave Taht 		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
97599803171SDave Taht 
97699803171SDave Taht 	if (tb[TCA_NETEM_JITTER64])
97799803171SDave Taht 		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
97899803171SDave Taht 
979e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
980e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
981e4ae004bSEric Dumazet 
982836af83bSDave Taht 	if (tb[TCA_NETEM_SLOT])
983836af83bSDave Taht 		get_slot(q, tb[TCA_NETEM_SLOT]);
984836af83bSDave Taht 
985661b7972Sstephen hemminger 	return ret;
9861da177e4SLinus Torvalds }
9871da177e4SLinus Torvalds 
988e63d7dfdSAlexander Aring static int netem_init(struct Qdisc *sch, struct nlattr *opt,
989e63d7dfdSAlexander Aring 		      struct netlink_ext_ack *extack)
9901da177e4SLinus Torvalds {
9911da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
9921da177e4SLinus Torvalds 	int ret;
9931da177e4SLinus Torvalds 
994634576a1SNikolay Aleksandrov 	qdisc_watchdog_init(&q->watchdog, sch);
995634576a1SNikolay Aleksandrov 
9961da177e4SLinus Torvalds 	if (!opt)
9971da177e4SLinus Torvalds 		return -EINVAL;
9981da177e4SLinus Torvalds 
999661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
10002030721cSAlexander Aring 	ret = netem_change(sch, opt, extack);
100150612537SEric Dumazet 	if (ret)
1002250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
10031da177e4SLinus Torvalds 	return ret;
10041da177e4SLinus Torvalds }
10051da177e4SLinus Torvalds 
10061da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
10071da177e4SLinus Torvalds {
10081da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10091da177e4SLinus Torvalds 
101059cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
101150612537SEric Dumazet 	if (q->qdisc)
10121da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
10136373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
10141da177e4SLinus Torvalds }
10151da177e4SLinus Torvalds 
1016661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
1017661b7972Sstephen hemminger 			   struct sk_buff *skb)
1018661b7972Sstephen hemminger {
1019661b7972Sstephen hemminger 	struct nlattr *nest;
1020661b7972Sstephen hemminger 
1021661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
1022661b7972Sstephen hemminger 	if (nest == NULL)
1023661b7972Sstephen hemminger 		goto nla_put_failure;
1024661b7972Sstephen hemminger 
1025661b7972Sstephen hemminger 	switch (q->loss_model) {
1026661b7972Sstephen hemminger 	case CLG_RANDOM:
1027661b7972Sstephen hemminger 		/* legacy loss model */
1028661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
1029661b7972Sstephen hemminger 		return 0;	/* no data */
1030661b7972Sstephen hemminger 
1031661b7972Sstephen hemminger 	case CLG_4_STATES: {
1032661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
1033661b7972Sstephen hemminger 			.p13 = q->clg.a1,
1034661b7972Sstephen hemminger 			.p31 = q->clg.a2,
1035661b7972Sstephen hemminger 			.p32 = q->clg.a3,
1036661b7972Sstephen hemminger 			.p14 = q->clg.a4,
1037661b7972Sstephen hemminger 			.p23 = q->clg.a5,
1038661b7972Sstephen hemminger 		};
1039661b7972Sstephen hemminger 
10401b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
10411b34ec43SDavid S. Miller 			goto nla_put_failure;
1042661b7972Sstephen hemminger 		break;
1043661b7972Sstephen hemminger 	}
1044661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
1045661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
1046661b7972Sstephen hemminger 			.p = q->clg.a1,
1047661b7972Sstephen hemminger 			.r = q->clg.a2,
1048661b7972Sstephen hemminger 			.h = q->clg.a3,
1049661b7972Sstephen hemminger 			.k1 = q->clg.a4,
1050661b7972Sstephen hemminger 		};
1051661b7972Sstephen hemminger 
10521b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
10531b34ec43SDavid S. Miller 			goto nla_put_failure;
1054661b7972Sstephen hemminger 		break;
1055661b7972Sstephen hemminger 	}
1056661b7972Sstephen hemminger 	}
1057661b7972Sstephen hemminger 
1058661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
1059661b7972Sstephen hemminger 	return 0;
1060661b7972Sstephen hemminger 
1061661b7972Sstephen hemminger nla_put_failure:
1062661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
1063661b7972Sstephen hemminger 	return -1;
1064661b7972Sstephen hemminger }
1065661b7972Sstephen hemminger 
10661da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
10671da177e4SLinus Torvalds {
10681da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
1069861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
10701da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
10711da177e4SLinus Torvalds 	struct tc_netem_corr cor;
10720dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
1073c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
10747bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
1075836af83bSDave Taht 	struct tc_netem_slot slot;
10761da177e4SLinus Torvalds 
1077112f9cb6SDave Taht 	qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1078112f9cb6SDave Taht 			     UINT_MAX);
1079112f9cb6SDave Taht 	qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1080112f9cb6SDave Taht 			    UINT_MAX);
10811da177e4SLinus Torvalds 	qopt.limit = q->limit;
10821da177e4SLinus Torvalds 	qopt.loss = q->loss;
10831da177e4SLinus Torvalds 	qopt.gap = q->gap;
10841da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
10851b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
10861b34ec43SDavid S. Miller 		goto nla_put_failure;
10871da177e4SLinus Torvalds 
108899803171SDave Taht 	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
108999803171SDave Taht 		goto nla_put_failure;
109099803171SDave Taht 
109199803171SDave Taht 	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
109299803171SDave Taht 		goto nla_put_failure;
109399803171SDave Taht 
10941da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
10951da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
10961da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
10971b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
10981b34ec43SDavid S. Miller 		goto nla_put_failure;
10990dca51d3SStephen Hemminger 
11000dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
11010dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
11021b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
11031b34ec43SDavid S. Miller 		goto nla_put_failure;
11040dca51d3SStephen Hemminger 
1105c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
1106c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
11071b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
11081b34ec43SDavid S. Miller 		goto nla_put_failure;
1109c865e5d9SStephen Hemminger 
11106a031f67SYang Yingliang 	if (q->rate >= (1ULL << 32)) {
11112a51c1e8SNicolas Dichtel 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
11122a51c1e8SNicolas Dichtel 				      TCA_NETEM_PAD))
11136a031f67SYang Yingliang 			goto nla_put_failure;
11146a031f67SYang Yingliang 		rate.rate = ~0U;
11156a031f67SYang Yingliang 	} else {
11167bc0f28cSHagen Paul Pfeifer 		rate.rate = q->rate;
11176a031f67SYang Yingliang 	}
111890b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
111990b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
112090b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
11211b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
11221b34ec43SDavid S. Miller 		goto nla_put_failure;
11237bc0f28cSHagen Paul Pfeifer 
1124e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1125e4ae004bSEric Dumazet 		goto nla_put_failure;
1126e4ae004bSEric Dumazet 
1127661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
1128661b7972Sstephen hemminger 		goto nla_put_failure;
1129661b7972Sstephen hemminger 
1130836af83bSDave Taht 	if (q->slot_config.min_delay | q->slot_config.max_delay) {
1131836af83bSDave Taht 		slot = q->slot_config;
1132836af83bSDave Taht 		if (slot.max_packets == INT_MAX)
1133836af83bSDave Taht 			slot.max_packets = 0;
1134836af83bSDave Taht 		if (slot.max_bytes == INT_MAX)
1135836af83bSDave Taht 			slot.max_bytes = 0;
1136836af83bSDave Taht 		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1137836af83bSDave Taht 			goto nla_put_failure;
1138836af83bSDave Taht 	}
1139836af83bSDave Taht 
1140861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
11411da177e4SLinus Torvalds 
11421e90474cSPatrick McHardy nla_put_failure:
1143861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
11441da177e4SLinus Torvalds 	return -1;
11451da177e4SLinus Torvalds }
11461da177e4SLinus Torvalds 
114710f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
114810f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
114910f6dfcfSstephen hemminger {
115010f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
115110f6dfcfSstephen hemminger 
115250612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
115310f6dfcfSstephen hemminger 		return -ENOENT;
115410f6dfcfSstephen hemminger 
115510f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
115610f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
115710f6dfcfSstephen hemminger 
115810f6dfcfSstephen hemminger 	return 0;
115910f6dfcfSstephen hemminger }
116010f6dfcfSstephen hemminger 
116110f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
116210f6dfcfSstephen hemminger 		     struct Qdisc **old)
116310f6dfcfSstephen hemminger {
116410f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
116510f6dfcfSstephen hemminger 
116686a7996cSWANG Cong 	*old = qdisc_replace(sch, new, &q->qdisc);
116710f6dfcfSstephen hemminger 	return 0;
116810f6dfcfSstephen hemminger }
116910f6dfcfSstephen hemminger 
117010f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
117110f6dfcfSstephen hemminger {
117210f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
117310f6dfcfSstephen hemminger 	return q->qdisc;
117410f6dfcfSstephen hemminger }
117510f6dfcfSstephen hemminger 
1176143976ceSWANG Cong static unsigned long netem_find(struct Qdisc *sch, u32 classid)
117710f6dfcfSstephen hemminger {
117810f6dfcfSstephen hemminger 	return 1;
117910f6dfcfSstephen hemminger }
118010f6dfcfSstephen hemminger 
118110f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
118210f6dfcfSstephen hemminger {
118310f6dfcfSstephen hemminger 	if (!walker->stop) {
118410f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
118510f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
118610f6dfcfSstephen hemminger 				walker->stop = 1;
118710f6dfcfSstephen hemminger 				return;
118810f6dfcfSstephen hemminger 			}
118910f6dfcfSstephen hemminger 		walker->count++;
119010f6dfcfSstephen hemminger 	}
119110f6dfcfSstephen hemminger }
119210f6dfcfSstephen hemminger 
119310f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
119410f6dfcfSstephen hemminger 	.graft		=	netem_graft,
119510f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
1196143976ceSWANG Cong 	.find		=	netem_find,
119710f6dfcfSstephen hemminger 	.walk		=	netem_walk,
119810f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
119910f6dfcfSstephen hemminger };
120010f6dfcfSstephen hemminger 
120120fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
12021da177e4SLinus Torvalds 	.id		=	"netem",
120310f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
12041da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
12051da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
12061da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
120777be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
12081da177e4SLinus Torvalds 	.init		=	netem_init,
12091da177e4SLinus Torvalds 	.reset		=	netem_reset,
12101da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
12111da177e4SLinus Torvalds 	.change		=	netem_change,
12121da177e4SLinus Torvalds 	.dump		=	netem_dump,
12131da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
12141da177e4SLinus Torvalds };
12151da177e4SLinus Torvalds 
12161da177e4SLinus Torvalds 
12171da177e4SLinus Torvalds static int __init netem_module_init(void)
12181da177e4SLinus Torvalds {
1219eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
12201da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
12211da177e4SLinus Torvalds }
12221da177e4SLinus Torvalds static void __exit netem_module_exit(void)
12231da177e4SLinus Torvalds {
12241da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
12251da177e4SLinus Torvalds }
12261da177e4SLinus Torvalds module_init(netem_module_init)
12271da177e4SLinus Torvalds module_exit(netem_module_exit)
12281da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1229