xref: /openbmc/linux/net/sched/sch_netem.c (revision 112f9cb6)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
26aec0a40aSEric Dumazet #include <linux/rbtree.h>
271da177e4SLinus Torvalds 
28dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
291da177e4SLinus Torvalds #include <net/pkt_sched.h>
30e4ae004bSEric Dumazet #include <net/inet_ecn.h>
311da177e4SLinus Torvalds 
32250a65f7Sstephen hemminger #define VERSION "1.3"
33eb229c4cSStephen Hemminger 
341da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
351da177e4SLinus Torvalds 	====================================
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
381da177e4SLinus Torvalds 		 Network Emulation Tool
391da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
401da177e4SLinus Torvalds 
411da177e4SLinus Torvalds 	 ----------------------------------------------------------------
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
441da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
451da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
461da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
471da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
481da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
491da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
521da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
531da177e4SLinus Torvalds 	 control either since that can be handled by using token
541da177e4SLinus Torvalds 	 bucket or other rate control.
55661b7972Sstephen hemminger 
56661b7972Sstephen hemminger      Correlated Loss Generator models
57661b7972Sstephen hemminger 
58661b7972Sstephen hemminger 	Added generation of correlated loss according to the
59661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
60661b7972Sstephen hemminger 
61661b7972Sstephen hemminger 	References:
62661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
65661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
66661b7972Sstephen hemminger 
67661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
691da177e4SLinus Torvalds */
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds struct netem_sched_data {
72aec0a40aSEric Dumazet 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
73aec0a40aSEric Dumazet 	struct rb_root t_root;
7450612537SEric Dumazet 
7550612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
761da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7750612537SEric Dumazet 
7859cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
791da177e4SLinus Torvalds 
80112f9cb6SDave Taht 	s64 latency;
81112f9cb6SDave Taht 	s64 jitter;
82b407621cSStephen Hemminger 
831da177e4SLinus Torvalds 	u32 loss;
84e4ae004bSEric Dumazet 	u32 ecn;
851da177e4SLinus Torvalds 	u32 limit;
861da177e4SLinus Torvalds 	u32 counter;
871da177e4SLinus Torvalds 	u32 gap;
881da177e4SLinus Torvalds 	u32 duplicate;
890dca51d3SStephen Hemminger 	u32 reorder;
90c865e5d9SStephen Hemminger 	u32 corrupt;
916a031f67SYang Yingliang 	u64 rate;
9290b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9390b41a1cSHagen Paul Pfeifer 	u32 cell_size;
94809fa972SHannes Frederic Sowa 	struct reciprocal_value cell_size_reciprocal;
9590b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds 	struct crndstate {
98b407621cSStephen Hemminger 		u32 last;
99b407621cSStephen Hemminger 		u32 rho;
100c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1011da177e4SLinus Torvalds 
1021da177e4SLinus Torvalds 	struct disttable {
1031da177e4SLinus Torvalds 		u32  size;
1041da177e4SLinus Torvalds 		s16 table[0];
1051da177e4SLinus Torvalds 	} *delay_dist;
106661b7972Sstephen hemminger 
107661b7972Sstephen hemminger 	enum  {
108661b7972Sstephen hemminger 		CLG_RANDOM,
109661b7972Sstephen hemminger 		CLG_4_STATES,
110661b7972Sstephen hemminger 		CLG_GILB_ELL,
111661b7972Sstephen hemminger 	} loss_model;
112661b7972Sstephen hemminger 
113a6e2fe17SYang Yingliang 	enum {
114a6e2fe17SYang Yingliang 		TX_IN_GAP_PERIOD = 1,
115a6e2fe17SYang Yingliang 		TX_IN_BURST_PERIOD,
116a6e2fe17SYang Yingliang 		LOST_IN_GAP_PERIOD,
117a6e2fe17SYang Yingliang 		LOST_IN_BURST_PERIOD,
118a6e2fe17SYang Yingliang 	} _4_state_model;
119a6e2fe17SYang Yingliang 
120c045a734SYang Yingliang 	enum {
121c045a734SYang Yingliang 		GOOD_STATE = 1,
122c045a734SYang Yingliang 		BAD_STATE,
123c045a734SYang Yingliang 	} GE_state_model;
124c045a734SYang Yingliang 
125661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
126661b7972Sstephen hemminger 	struct clgstate {
127661b7972Sstephen hemminger 		/* state of the Markov chain */
128661b7972Sstephen hemminger 		u8 state;
129661b7972Sstephen hemminger 
130661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
131661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
132661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
133661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
134661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
135661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
136661b7972Sstephen hemminger 	} clg;
137661b7972Sstephen hemminger 
1381da177e4SLinus Torvalds };
1391da177e4SLinus Torvalds 
14050612537SEric Dumazet /* Time stamp put into socket buffer control block
14150612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
14256b17425SEric Dumazet  *
14356b17425SEric Dumazet  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
14456b17425SEric Dumazet  * and skb->next & skb->prev are scratch space for a qdisc,
14556b17425SEric Dumazet  * we save skb->tstamp value in skb->cb[] before destroying it.
14650612537SEric Dumazet  */
1471da177e4SLinus Torvalds struct netem_skb_cb {
148112f9cb6SDave Taht 	u64	        time_to_send;
1491da177e4SLinus Torvalds };
1501da177e4SLinus Torvalds 
1515f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1525f86173bSJussi Kivilinna {
153aec0a40aSEric Dumazet 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
15416bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
155175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1565f86173bSJussi Kivilinna }
1575f86173bSJussi Kivilinna 
1581da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1591da177e4SLinus Torvalds  * Use entropy source for initial seed.
1601da177e4SLinus Torvalds  */
1611da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1621da177e4SLinus Torvalds {
1631da177e4SLinus Torvalds 	state->rho = rho;
16463862b5bSAruna-Hewapathirane 	state->last = prandom_u32();
1651da177e4SLinus Torvalds }
1661da177e4SLinus Torvalds 
1671da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1681da177e4SLinus Torvalds  * Next number depends on last value.
1691da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1701da177e4SLinus Torvalds  */
171b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1721da177e4SLinus Torvalds {
1731da177e4SLinus Torvalds 	u64 value, rho;
1741da177e4SLinus Torvalds 	unsigned long answer;
1751da177e4SLinus Torvalds 
176bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
17763862b5bSAruna-Hewapathirane 		return prandom_u32();
1781da177e4SLinus Torvalds 
17963862b5bSAruna-Hewapathirane 	value = prandom_u32();
1801da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1811da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1821da177e4SLinus Torvalds 	state->last = answer;
1831da177e4SLinus Torvalds 	return answer;
1841da177e4SLinus Torvalds }
1851da177e4SLinus Torvalds 
186661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
187661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
188661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
189661b7972Sstephen hemminger  */
190661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
191661b7972Sstephen hemminger {
192661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
19363862b5bSAruna-Hewapathirane 	u32 rnd = prandom_u32();
194661b7972Sstephen hemminger 
195661b7972Sstephen hemminger 	/*
19625985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
197661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
198661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
199661b7972Sstephen hemminger 	 * The four states correspond to:
200a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
201a6e2fe17SYang Yingliang 	 *   LOST_IN_BURST_PERIOD => isolated losses within a gap period
202a6e2fe17SYang Yingliang 	 *   LOST_IN_GAP_PERIOD => lost packets within a burst period
203a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
204661b7972Sstephen hemminger 	 */
205661b7972Sstephen hemminger 	switch (clg->state) {
206a6e2fe17SYang Yingliang 	case TX_IN_GAP_PERIOD:
207661b7972Sstephen hemminger 		if (rnd < clg->a4) {
208a6e2fe17SYang Yingliang 			clg->state = LOST_IN_BURST_PERIOD;
209661b7972Sstephen hemminger 			return true;
210ab6c27beSstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
211a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
212661b7972Sstephen hemminger 			return true;
213a6e2fe17SYang Yingliang 		} else if (clg->a1 + clg->a4 < rnd) {
214a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
215a6e2fe17SYang Yingliang 		}
216661b7972Sstephen hemminger 
217661b7972Sstephen hemminger 		break;
218a6e2fe17SYang Yingliang 	case TX_IN_BURST_PERIOD:
219661b7972Sstephen hemminger 		if (rnd < clg->a5) {
220a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
221661b7972Sstephen hemminger 			return true;
222a6e2fe17SYang Yingliang 		} else {
223a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
224a6e2fe17SYang Yingliang 		}
225661b7972Sstephen hemminger 
226661b7972Sstephen hemminger 		break;
227a6e2fe17SYang Yingliang 	case LOST_IN_GAP_PERIOD:
228661b7972Sstephen hemminger 		if (rnd < clg->a3)
229a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
230661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
231a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
232661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
233a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
234661b7972Sstephen hemminger 			return true;
235661b7972Sstephen hemminger 		}
236661b7972Sstephen hemminger 		break;
237a6e2fe17SYang Yingliang 	case LOST_IN_BURST_PERIOD:
238a6e2fe17SYang Yingliang 		clg->state = TX_IN_GAP_PERIOD;
239661b7972Sstephen hemminger 		break;
240661b7972Sstephen hemminger 	}
241661b7972Sstephen hemminger 
242661b7972Sstephen hemminger 	return false;
243661b7972Sstephen hemminger }
244661b7972Sstephen hemminger 
245661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
246661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
247661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
248661b7972Sstephen hemminger  *
24925985edcSLucas De Marchi  * Makes a comparison between random number and the transition
250661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
25125985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
252661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
253661b7972Sstephen hemminger  * packet will be transmitted or lost.
254661b7972Sstephen hemminger  */
255661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
256661b7972Sstephen hemminger {
257661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
258661b7972Sstephen hemminger 
259661b7972Sstephen hemminger 	switch (clg->state) {
260c045a734SYang Yingliang 	case GOOD_STATE:
26163862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a1)
262c045a734SYang Yingliang 			clg->state = BAD_STATE;
26363862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a4)
264661b7972Sstephen hemminger 			return true;
2657c2781faSstephen hemminger 		break;
266c045a734SYang Yingliang 	case BAD_STATE:
26763862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a2)
268c045a734SYang Yingliang 			clg->state = GOOD_STATE;
26963862b5bSAruna-Hewapathirane 		if (prandom_u32() > clg->a3)
270661b7972Sstephen hemminger 			return true;
271661b7972Sstephen hemminger 	}
272661b7972Sstephen hemminger 
273661b7972Sstephen hemminger 	return false;
274661b7972Sstephen hemminger }
275661b7972Sstephen hemminger 
276661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
277661b7972Sstephen hemminger {
278661b7972Sstephen hemminger 	switch (q->loss_model) {
279661b7972Sstephen hemminger 	case CLG_RANDOM:
280661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
281661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
282661b7972Sstephen hemminger 
283661b7972Sstephen hemminger 	case CLG_4_STATES:
284661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
285661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
286661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
287661b7972Sstephen hemminger 		* the kernel logs
288661b7972Sstephen hemminger 		*/
289661b7972Sstephen hemminger 		return loss_4state(q);
290661b7972Sstephen hemminger 
291661b7972Sstephen hemminger 	case CLG_GILB_ELL:
292661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
293661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
294661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
295661b7972Sstephen hemminger 		* the kernel logs
296661b7972Sstephen hemminger 		*/
297661b7972Sstephen hemminger 		return loss_gilb_ell(q);
298661b7972Sstephen hemminger 	}
299661b7972Sstephen hemminger 
300661b7972Sstephen hemminger 	return false;	/* not reached */
301661b7972Sstephen hemminger }
302661b7972Sstephen hemminger 
303661b7972Sstephen hemminger 
3041da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
3051da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
3061da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
3071da177e4SLinus Torvalds  */
308112f9cb6SDave Taht static s64 tabledist(s64 mu, s64 sigma,
309b407621cSStephen Hemminger 		     struct crndstate *state,
310b407621cSStephen Hemminger 			 const struct disttable *dist)
3111da177e4SLinus Torvalds {
312112f9cb6SDave Taht 	s64 x;
313b407621cSStephen Hemminger 	long t;
314b407621cSStephen Hemminger 	u32 rnd;
3151da177e4SLinus Torvalds 
3161da177e4SLinus Torvalds 	if (sigma == 0)
3171da177e4SLinus Torvalds 		return mu;
3181da177e4SLinus Torvalds 
3191da177e4SLinus Torvalds 	rnd = get_crandom(state);
3201da177e4SLinus Torvalds 
3211da177e4SLinus Torvalds 	/* default uniform distribution */
3221da177e4SLinus Torvalds 	if (dist == NULL)
3231da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
3241da177e4SLinus Torvalds 
3251da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3261da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3271da177e4SLinus Torvalds 	if (x >= 0)
3281da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3291da177e4SLinus Torvalds 	else
3301da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3331da177e4SLinus Torvalds }
3341da177e4SLinus Torvalds 
335112f9cb6SDave Taht static u64 packet_len_2_sched_time(unsigned int len,
336112f9cb6SDave Taht 				   struct netem_sched_data *q)
3377bc0f28cSHagen Paul Pfeifer {
338112f9cb6SDave Taht 	u64 offset;
33990b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
34090b41a1cSHagen Paul Pfeifer 
34190b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
34290b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
34390b41a1cSHagen Paul Pfeifer 
34490b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
34590b41a1cSHagen Paul Pfeifer 			cells++;
34690b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
34790b41a1cSHagen Paul Pfeifer 	}
348112f9cb6SDave Taht 	offset = (u64)len * NSEC_PER_SEC;
349112f9cb6SDave Taht 	do_div(offset, q->rate);
350112f9cb6SDave Taht 	return offset;
3517bc0f28cSHagen Paul Pfeifer }
3527bc0f28cSHagen Paul Pfeifer 
353ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch)
354ff704050Sstephen hemminger {
355ff704050Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3563aa605f2SEric Dumazet 	struct rb_node *p = rb_first(&q->t_root);
357ff704050Sstephen hemminger 
3583aa605f2SEric Dumazet 	while (p) {
35918a4c0eaSEric Dumazet 		struct sk_buff *skb = rb_to_skb(p);
360ff704050Sstephen hemminger 
3613aa605f2SEric Dumazet 		p = rb_next(p);
3623aa605f2SEric Dumazet 		rb_erase(&skb->rbnode, &q->t_root);
3632f08a9a1SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
364ff704050Sstephen hemminger 	}
365ff704050Sstephen hemminger }
366ff704050Sstephen hemminger 
367960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
36850612537SEric Dumazet {
369aec0a40aSEric Dumazet 	struct netem_sched_data *q = qdisc_priv(sch);
370112f9cb6SDave Taht 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
371aec0a40aSEric Dumazet 	struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
37250612537SEric Dumazet 
373aec0a40aSEric Dumazet 	while (*p) {
374aec0a40aSEric Dumazet 		struct sk_buff *skb;
37550612537SEric Dumazet 
376aec0a40aSEric Dumazet 		parent = *p;
37718a4c0eaSEric Dumazet 		skb = rb_to_skb(parent);
37850612537SEric Dumazet 		if (tnext >= netem_skb_cb(skb)->time_to_send)
379aec0a40aSEric Dumazet 			p = &parent->rb_right;
380aec0a40aSEric Dumazet 		else
381aec0a40aSEric Dumazet 			p = &parent->rb_left;
38250612537SEric Dumazet 	}
38356b17425SEric Dumazet 	rb_link_node(&nskb->rbnode, parent, p);
38456b17425SEric Dumazet 	rb_insert_color(&nskb->rbnode, &q->t_root);
385aec0a40aSEric Dumazet 	sch->q.qlen++;
38650612537SEric Dumazet }
38750612537SEric Dumazet 
3886071bd1aSNeil Horman /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
3896071bd1aSNeil Horman  * when we statistically choose to corrupt one, we instead segment it, returning
3906071bd1aSNeil Horman  * the first packet to be corrupted, and re-enqueue the remaining frames
3916071bd1aSNeil Horman  */
392520ac30fSEric Dumazet static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
393520ac30fSEric Dumazet 				     struct sk_buff **to_free)
3946071bd1aSNeil Horman {
3956071bd1aSNeil Horman 	struct sk_buff *segs;
3966071bd1aSNeil Horman 	netdev_features_t features = netif_skb_features(skb);
3976071bd1aSNeil Horman 
3986071bd1aSNeil Horman 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
3996071bd1aSNeil Horman 
4006071bd1aSNeil Horman 	if (IS_ERR_OR_NULL(segs)) {
401520ac30fSEric Dumazet 		qdisc_drop(skb, sch, to_free);
4026071bd1aSNeil Horman 		return NULL;
4036071bd1aSNeil Horman 	}
4046071bd1aSNeil Horman 	consume_skb(skb);
4056071bd1aSNeil Horman 	return segs;
4066071bd1aSNeil Horman }
4076071bd1aSNeil Horman 
40848da34b7SFlorian Westphal static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
40948da34b7SFlorian Westphal {
41048da34b7SFlorian Westphal 	skb->next = qh->head;
41148da34b7SFlorian Westphal 
41248da34b7SFlorian Westphal 	if (!qh->head)
41348da34b7SFlorian Westphal 		qh->tail = skb;
41448da34b7SFlorian Westphal 	qh->head = skb;
41548da34b7SFlorian Westphal 	qh->qlen++;
41648da34b7SFlorian Westphal }
41748da34b7SFlorian Westphal 
4180afb51e7SStephen Hemminger /*
4190afb51e7SStephen Hemminger  * Insert one skb into qdisc.
4200afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
4210afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
4220afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
4230afb51e7SStephen Hemminger  */
424520ac30fSEric Dumazet static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
425520ac30fSEric Dumazet 			 struct sk_buff **to_free)
4261da177e4SLinus Torvalds {
4271da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
42889e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
42989e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
4300afb51e7SStephen Hemminger 	struct sk_buff *skb2;
4316071bd1aSNeil Horman 	struct sk_buff *segs = NULL;
4326071bd1aSNeil Horman 	unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
4336071bd1aSNeil Horman 	int nb = 0;
4340afb51e7SStephen Hemminger 	int count = 1;
4356071bd1aSNeil Horman 	int rc = NET_XMIT_SUCCESS;
4361da177e4SLinus Torvalds 
4370afb51e7SStephen Hemminger 	/* Random duplication */
4380afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
4390afb51e7SStephen Hemminger 		++count;
4400afb51e7SStephen Hemminger 
441661b7972Sstephen hemminger 	/* Drop packet? */
442e4ae004bSEric Dumazet 	if (loss_event(q)) {
443e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
44425331d6cSJohn Fastabend 			qdisc_qstats_drop(sch); /* mark packet */
445e4ae004bSEric Dumazet 		else
4460afb51e7SStephen Hemminger 			--count;
447e4ae004bSEric Dumazet 	}
4480afb51e7SStephen Hemminger 	if (count == 0) {
44925331d6cSJohn Fastabend 		qdisc_qstats_drop(sch);
450520ac30fSEric Dumazet 		__qdisc_drop(skb, to_free);
451c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
4521da177e4SLinus Torvalds 	}
4531da177e4SLinus Torvalds 
4545a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
4555a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
4565a308f40SEric Dumazet 	 */
4575080f39eSNik Unger 	if (q->latency || q->jitter || q->rate)
458f2f872f9SEric Dumazet 		skb_orphan_partial(skb);
4594e8a5201SDavid S. Miller 
4600afb51e7SStephen Hemminger 	/*
4610afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
4620afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
4630afb51e7SStephen Hemminger 	 * skb will be queued.
464d5d75cd6SStephen Hemminger 	 */
4650afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
4667698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
4670afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
468d5d75cd6SStephen Hemminger 
469b396cca6SEric Dumazet 		q->duplicate = 0;
470520ac30fSEric Dumazet 		rootq->enqueue(skb2, rootq, to_free);
4710afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4721da177e4SLinus Torvalds 	}
4731da177e4SLinus Torvalds 
474c865e5d9SStephen Hemminger 	/*
475c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
476c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
477c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
478c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
479c865e5d9SStephen Hemminger 	 */
480c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
4816071bd1aSNeil Horman 		if (skb_is_gso(skb)) {
482520ac30fSEric Dumazet 			segs = netem_segment(skb, sch, to_free);
4836071bd1aSNeil Horman 			if (!segs)
4846071bd1aSNeil Horman 				return NET_XMIT_DROP;
4856071bd1aSNeil Horman 		} else {
4866071bd1aSNeil Horman 			segs = skb;
4876071bd1aSNeil Horman 		}
4886071bd1aSNeil Horman 
4896071bd1aSNeil Horman 		skb = segs;
4906071bd1aSNeil Horman 		segs = segs->next;
4916071bd1aSNeil Horman 
4928a6e9c67SEric Dumazet 		skb = skb_unshare(skb, GFP_ATOMIC);
4938a6e9c67SEric Dumazet 		if (unlikely(!skb)) {
4948a6e9c67SEric Dumazet 			qdisc_qstats_drop(sch);
4958a6e9c67SEric Dumazet 			goto finish_segs;
4968a6e9c67SEric Dumazet 		}
4978a6e9c67SEric Dumazet 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
4988a6e9c67SEric Dumazet 		    skb_checksum_help(skb)) {
4998a6e9c67SEric Dumazet 			qdisc_drop(skb, sch, to_free);
5006071bd1aSNeil Horman 			goto finish_segs;
5016071bd1aSNeil Horman 		}
502c865e5d9SStephen Hemminger 
50363862b5bSAruna-Hewapathirane 		skb->data[prandom_u32() % skb_headlen(skb)] ^=
50463862b5bSAruna-Hewapathirane 			1<<(prandom_u32() % 8);
505c865e5d9SStephen Hemminger 	}
506c865e5d9SStephen Hemminger 
50797d0678fSFlorian Westphal 	if (unlikely(sch->q.qlen >= sch->limit))
508520ac30fSEric Dumazet 		return qdisc_drop(skb, sch, to_free);
509960fb66eSEric Dumazet 
51025331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
511960fb66eSEric Dumazet 
5125f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
513f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
514a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
515f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
516112f9cb6SDave Taht 		u64 now;
517112f9cb6SDave Taht 		s64 delay;
51807aaa115SStephen Hemminger 
51907aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
52007aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
52107aaa115SStephen Hemminger 
522112f9cb6SDave Taht 		now = ktime_get_ns();
5237bc0f28cSHagen Paul Pfeifer 
5247bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
5255080f39eSNik Unger 			struct netem_skb_cb *last = NULL;
5267bc0f28cSHagen Paul Pfeifer 
5275080f39eSNik Unger 			if (sch->q.tail)
5285080f39eSNik Unger 				last = netem_skb_cb(sch->q.tail);
5295080f39eSNik Unger 			if (q->t_root.rb_node) {
5305080f39eSNik Unger 				struct sk_buff *t_skb;
5315080f39eSNik Unger 				struct netem_skb_cb *t_last;
5325080f39eSNik Unger 
53318a4c0eaSEric Dumazet 				t_skb = skb_rb_last(&q->t_root);
5345080f39eSNik Unger 				t_last = netem_skb_cb(t_skb);
5355080f39eSNik Unger 				if (!last ||
5365080f39eSNik Unger 				    t_last->time_to_send > last->time_to_send) {
5375080f39eSNik Unger 					last = t_last;
5385080f39eSNik Unger 				}
5395080f39eSNik Unger 			}
5405080f39eSNik Unger 
541aec0a40aSEric Dumazet 			if (last) {
5427bc0f28cSHagen Paul Pfeifer 				/*
543a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
544a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
5457bc0f28cSHagen Paul Pfeifer 				 * from delay.
5467bc0f28cSHagen Paul Pfeifer 				 */
5475080f39eSNik Unger 				delay -= last->time_to_send - now;
548112f9cb6SDave Taht 				delay = max_t(s64, 0, delay);
5495080f39eSNik Unger 				now = last->time_to_send;
5507bc0f28cSHagen Paul Pfeifer 			}
551a13d3104SJohannes Naab 
5528cfd88d6SYang Yingliang 			delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
5537bc0f28cSHagen Paul Pfeifer 		}
5547bc0f28cSHagen Paul Pfeifer 
5557c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
5561da177e4SLinus Torvalds 		++q->counter;
557960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
5581da177e4SLinus Torvalds 	} else {
5590dca51d3SStephen Hemminger 		/*
5600dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
5610dca51d3SStephen Hemminger 		 * of the queue.
5620dca51d3SStephen Hemminger 		 */
563112f9cb6SDave Taht 		cb->time_to_send = ktime_get_ns();
5640dca51d3SStephen Hemminger 		q->counter = 0;
5658ba25dadSJarek Poplawski 
56648da34b7SFlorian Westphal 		netem_enqueue_skb_head(&sch->q, skb);
567eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
568378a2f09SJarek Poplawski 	}
5691da177e4SLinus Torvalds 
5706071bd1aSNeil Horman finish_segs:
5716071bd1aSNeil Horman 	if (segs) {
5726071bd1aSNeil Horman 		while (segs) {
5736071bd1aSNeil Horman 			skb2 = segs->next;
5746071bd1aSNeil Horman 			segs->next = NULL;
5756071bd1aSNeil Horman 			qdisc_skb_cb(segs)->pkt_len = segs->len;
5766071bd1aSNeil Horman 			last_len = segs->len;
577520ac30fSEric Dumazet 			rc = qdisc_enqueue(segs, sch, to_free);
5786071bd1aSNeil Horman 			if (rc != NET_XMIT_SUCCESS) {
5796071bd1aSNeil Horman 				if (net_xmit_drop_count(rc))
5806071bd1aSNeil Horman 					qdisc_qstats_drop(sch);
5816071bd1aSNeil Horman 			} else {
5826071bd1aSNeil Horman 				nb++;
5836071bd1aSNeil Horman 				len += last_len;
5846071bd1aSNeil Horman 			}
5856071bd1aSNeil Horman 			segs = skb2;
5866071bd1aSNeil Horman 		}
5876071bd1aSNeil Horman 		sch->q.qlen += nb;
5886071bd1aSNeil Horman 		if (nb > 1)
5896071bd1aSNeil Horman 			qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
5906071bd1aSNeil Horman 	}
59110f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
5921da177e4SLinus Torvalds }
5931da177e4SLinus Torvalds 
5941da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
5951da177e4SLinus Torvalds {
5961da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5971da177e4SLinus Torvalds 	struct sk_buff *skb;
598aec0a40aSEric Dumazet 	struct rb_node *p;
5991da177e4SLinus Torvalds 
60050612537SEric Dumazet tfifo_dequeue:
601ed760cb8SFlorian Westphal 	skb = __qdisc_dequeue_head(&sch->q);
602771018e7SStephen Hemminger 	if (skb) {
60325331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
6040ad2a836SBeshay, Joseph deliver:
605aec0a40aSEric Dumazet 		qdisc_bstats_update(sch, skb);
606aec0a40aSEric Dumazet 		return skb;
607aec0a40aSEric Dumazet 	}
608aec0a40aSEric Dumazet 	p = rb_first(&q->t_root);
609aec0a40aSEric Dumazet 	if (p) {
610112f9cb6SDave Taht 		u64 time_to_send;
61136b7bfe0SEric Dumazet 
61218a4c0eaSEric Dumazet 		skb = rb_to_skb(p);
6130f9f32acSStephen Hemminger 
6140f9f32acSStephen Hemminger 		/* if more time remaining? */
61536b7bfe0SEric Dumazet 		time_to_send = netem_skb_cb(skb)->time_to_send;
616112f9cb6SDave Taht 		if (time_to_send <= ktime_get_ns()) {
617aec0a40aSEric Dumazet 			rb_erase(p, &q->t_root);
618aec0a40aSEric Dumazet 
619aec0a40aSEric Dumazet 			sch->q.qlen--;
6200ad2a836SBeshay, Joseph 			qdisc_qstats_backlog_dec(sch, skb);
621aec0a40aSEric Dumazet 			skb->next = NULL;
622aec0a40aSEric Dumazet 			skb->prev = NULL;
623bffa72cfSEric Dumazet 			/* skb->dev shares skb->rbnode area,
624bffa72cfSEric Dumazet 			 * we need to restore its value.
625bffa72cfSEric Dumazet 			 */
626bffa72cfSEric Dumazet 			skb->dev = qdisc_dev(sch);
62703c05f0dSJarek Poplawski 
6288caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
6298caf1539SJarek Poplawski 			/*
6308caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
6318caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
6328caf1539SJarek Poplawski 			 */
633bc31c905SWillem de Bruijn 			if (skb->tc_redirected && skb->tc_from_ingress)
6342456e855SThomas Gleixner 				skb->tstamp = 0;
6358caf1539SJarek Poplawski #endif
63610f6dfcfSstephen hemminger 
63750612537SEric Dumazet 			if (q->qdisc) {
63821de12eeSEric Dumazet 				unsigned int pkt_len = qdisc_pkt_len(skb);
639520ac30fSEric Dumazet 				struct sk_buff *to_free = NULL;
640520ac30fSEric Dumazet 				int err;
64150612537SEric Dumazet 
642520ac30fSEric Dumazet 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
643520ac30fSEric Dumazet 				kfree_skb_list(to_free);
64421de12eeSEric Dumazet 				if (err != NET_XMIT_SUCCESS &&
64521de12eeSEric Dumazet 				    net_xmit_drop_count(err)) {
64625331d6cSJohn Fastabend 					qdisc_qstats_drop(sch);
6472ccccf5fSWANG Cong 					qdisc_tree_reduce_backlog(sch, 1,
64821de12eeSEric Dumazet 								  pkt_len);
64950612537SEric Dumazet 				}
65050612537SEric Dumazet 				goto tfifo_dequeue;
65150612537SEric Dumazet 			}
652aec0a40aSEric Dumazet 			goto deliver;
65311274e5aSStephen Hemminger 		}
65407aaa115SStephen Hemminger 
65550612537SEric Dumazet 		if (q->qdisc) {
65650612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
65750612537SEric Dumazet 			if (skb)
65850612537SEric Dumazet 				goto deliver;
65950612537SEric Dumazet 		}
660112f9cb6SDave Taht 		qdisc_watchdog_schedule_ns(&q->watchdog, time_to_send);
6610f9f32acSStephen Hemminger 	}
6620f9f32acSStephen Hemminger 
66350612537SEric Dumazet 	if (q->qdisc) {
66450612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
66550612537SEric Dumazet 		if (skb)
66650612537SEric Dumazet 			goto deliver;
66750612537SEric Dumazet 	}
6680f9f32acSStephen Hemminger 	return NULL;
6691da177e4SLinus Torvalds }
6701da177e4SLinus Torvalds 
6711da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
6721da177e4SLinus Torvalds {
6731da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6741da177e4SLinus Torvalds 
67550612537SEric Dumazet 	qdisc_reset_queue(sch);
676ff704050Sstephen hemminger 	tfifo_reset(sch);
67750612537SEric Dumazet 	if (q->qdisc)
6781da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
67959cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
6801da177e4SLinus Torvalds }
6811da177e4SLinus Torvalds 
6826373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
6836373a9a2Sstephen hemminger {
6844cb28970SWANG Cong 	kvfree(d);
6856373a9a2Sstephen hemminger }
6866373a9a2Sstephen hemminger 
6871da177e4SLinus Torvalds /*
6881da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
6891da177e4SLinus Torvalds  * signed 16 bit values.
6901da177e4SLinus Torvalds  */
6911e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
6921da177e4SLinus Torvalds {
6931da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6946373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
6951e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
6967698b4fcSDavid S. Miller 	spinlock_t *root_lock;
6971da177e4SLinus Torvalds 	struct disttable *d;
6981da177e4SLinus Torvalds 	int i;
6991da177e4SLinus Torvalds 
700df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
7011da177e4SLinus Torvalds 		return -EINVAL;
7021da177e4SLinus Torvalds 
703752ade68SMichal Hocko 	d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
7041da177e4SLinus Torvalds 	if (!d)
7051da177e4SLinus Torvalds 		return -ENOMEM;
7061da177e4SLinus Torvalds 
7071da177e4SLinus Torvalds 	d->size = n;
7081da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
7091da177e4SLinus Torvalds 		d->table[i] = data[i];
7101da177e4SLinus Torvalds 
711102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
7127698b4fcSDavid S. Miller 
7137698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
714bb52c7acSEric Dumazet 	swap(q->delay_dist, d);
7157698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
716bb52c7acSEric Dumazet 
717bb52c7acSEric Dumazet 	dist_free(d);
7181da177e4SLinus Torvalds 	return 0;
7191da177e4SLinus Torvalds }
7201da177e4SLinus Torvalds 
72149545a77SYang Yingliang static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
7221da177e4SLinus Torvalds {
7231e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
7241da177e4SLinus Torvalds 
7251da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
7261da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
7271da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
7281da177e4SLinus Torvalds }
7291da177e4SLinus Torvalds 
73049545a77SYang Yingliang static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
7310dca51d3SStephen Hemminger {
7321e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
7330dca51d3SStephen Hemminger 
7340dca51d3SStephen Hemminger 	q->reorder = r->probability;
7350dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
7360dca51d3SStephen Hemminger }
7370dca51d3SStephen Hemminger 
73849545a77SYang Yingliang static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
739c865e5d9SStephen Hemminger {
7401e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
741c865e5d9SStephen Hemminger 
742c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
743c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
744c865e5d9SStephen Hemminger }
745c865e5d9SStephen Hemminger 
74649545a77SYang Yingliang static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
7477bc0f28cSHagen Paul Pfeifer {
7487bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
7497bc0f28cSHagen Paul Pfeifer 
7507bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
75190b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
75290b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
753809fa972SHannes Frederic Sowa 	q->cell_overhead = r->cell_overhead;
75490b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
75590b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
756809fa972SHannes Frederic Sowa 	else
757809fa972SHannes Frederic Sowa 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
7587bc0f28cSHagen Paul Pfeifer }
7597bc0f28cSHagen Paul Pfeifer 
76049545a77SYang Yingliang static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
761661b7972Sstephen hemminger {
762661b7972Sstephen hemminger 	const struct nlattr *la;
763661b7972Sstephen hemminger 	int rem;
764661b7972Sstephen hemminger 
765661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
766661b7972Sstephen hemminger 		u16 type = nla_type(la);
767661b7972Sstephen hemminger 
768661b7972Sstephen hemminger 		switch (type) {
769661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
770661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
771661b7972Sstephen hemminger 
7722494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
773661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
774661b7972Sstephen hemminger 				return -EINVAL;
775661b7972Sstephen hemminger 			}
776661b7972Sstephen hemminger 
777661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
778661b7972Sstephen hemminger 
7793fbac2a8SYang Yingliang 			q->clg.state = TX_IN_GAP_PERIOD;
780661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
781661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
782661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
783661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
784661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
785661b7972Sstephen hemminger 			break;
786661b7972Sstephen hemminger 		}
787661b7972Sstephen hemminger 
788661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
789661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
790661b7972Sstephen hemminger 
7912494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
7922494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
793661b7972Sstephen hemminger 				return -EINVAL;
794661b7972Sstephen hemminger 			}
795661b7972Sstephen hemminger 
796661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
7973fbac2a8SYang Yingliang 			q->clg.state = GOOD_STATE;
798661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
799661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
800661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
801661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
802661b7972Sstephen hemminger 			break;
803661b7972Sstephen hemminger 		}
804661b7972Sstephen hemminger 
805661b7972Sstephen hemminger 		default:
806661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
807661b7972Sstephen hemminger 			return -EINVAL;
808661b7972Sstephen hemminger 		}
809661b7972Sstephen hemminger 	}
810661b7972Sstephen hemminger 
811661b7972Sstephen hemminger 	return 0;
812661b7972Sstephen hemminger }
813661b7972Sstephen hemminger 
81427a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
81527a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
81627a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
81727a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
8187bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
819661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
820e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
8216a031f67SYang Yingliang 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
82227a3421eSPatrick McHardy };
82327a3421eSPatrick McHardy 
8242c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
8252c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
8262c10b32bSThomas Graf {
8272c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
8282c10b32bSThomas Graf 
829661b7972Sstephen hemminger 	if (nested_len < 0) {
830661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
8312c10b32bSThomas Graf 		return -EINVAL;
832661b7972Sstephen hemminger 	}
833661b7972Sstephen hemminger 
8342c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
8352c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
836fceb6435SJohannes Berg 				 nested_len, policy, NULL);
837661b7972Sstephen hemminger 
8382c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
8392c10b32bSThomas Graf 	return 0;
8402c10b32bSThomas Graf }
8412c10b32bSThomas Graf 
842c865e5d9SStephen Hemminger /* Parse netlink message to set options */
8431e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
8441da177e4SLinus Torvalds {
8451da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
846b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
8471da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
84854a4b05cSYang Yingliang 	struct clgstate old_clg;
84954a4b05cSYang Yingliang 	int old_loss_model = CLG_RANDOM;
8501da177e4SLinus Torvalds 	int ret;
8511da177e4SLinus Torvalds 
852b03f4672SPatrick McHardy 	if (opt == NULL)
8531da177e4SLinus Torvalds 		return -EINVAL;
8541da177e4SLinus Torvalds 
8552c10b32bSThomas Graf 	qopt = nla_data(opt);
8562c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
857b03f4672SPatrick McHardy 	if (ret < 0)
858b03f4672SPatrick McHardy 		return ret;
859b03f4672SPatrick McHardy 
86054a4b05cSYang Yingliang 	/* backup q->clg and q->loss_model */
86154a4b05cSYang Yingliang 	old_clg = q->clg;
86254a4b05cSYang Yingliang 	old_loss_model = q->loss_model;
86354a4b05cSYang Yingliang 
86454a4b05cSYang Yingliang 	if (tb[TCA_NETEM_LOSS]) {
86549545a77SYang Yingliang 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
86654a4b05cSYang Yingliang 		if (ret) {
86754a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
86854a4b05cSYang Yingliang 			return ret;
86954a4b05cSYang Yingliang 		}
87054a4b05cSYang Yingliang 	} else {
87154a4b05cSYang Yingliang 		q->loss_model = CLG_RANDOM;
87254a4b05cSYang Yingliang 	}
87354a4b05cSYang Yingliang 
87454a4b05cSYang Yingliang 	if (tb[TCA_NETEM_DELAY_DIST]) {
87554a4b05cSYang Yingliang 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
87654a4b05cSYang Yingliang 		if (ret) {
87754a4b05cSYang Yingliang 			/* recover clg and loss_model, in case of
87854a4b05cSYang Yingliang 			 * q->clg and q->loss_model were modified
87954a4b05cSYang Yingliang 			 * in get_loss_clg()
88054a4b05cSYang Yingliang 			 */
88154a4b05cSYang Yingliang 			q->clg = old_clg;
88254a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
88354a4b05cSYang Yingliang 			return ret;
88454a4b05cSYang Yingliang 		}
88554a4b05cSYang Yingliang 	}
88654a4b05cSYang Yingliang 
88750612537SEric Dumazet 	sch->limit = qopt->limit;
8881da177e4SLinus Torvalds 
889112f9cb6SDave Taht 	q->latency = PSCHED_TICKS2NS(qopt->latency);
890112f9cb6SDave Taht 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
8911da177e4SLinus Torvalds 	q->limit = qopt->limit;
8921da177e4SLinus Torvalds 	q->gap = qopt->gap;
8930dca51d3SStephen Hemminger 	q->counter = 0;
8941da177e4SLinus Torvalds 	q->loss = qopt->loss;
8951da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
8961da177e4SLinus Torvalds 
897bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
898bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
8990dca51d3SStephen Hemminger 	 */
900a362e0a7SStephen Hemminger 	if (q->gap)
9010dca51d3SStephen Hemminger 		q->reorder = ~0;
9020dca51d3SStephen Hemminger 
903265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
90449545a77SYang Yingliang 		get_correlation(q, tb[TCA_NETEM_CORR]);
9051da177e4SLinus Torvalds 
906265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
90749545a77SYang Yingliang 		get_reorder(q, tb[TCA_NETEM_REORDER]);
9081da177e4SLinus Torvalds 
909265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
91049545a77SYang Yingliang 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
9111da177e4SLinus Torvalds 
9127bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
91349545a77SYang Yingliang 		get_rate(q, tb[TCA_NETEM_RATE]);
9147bc0f28cSHagen Paul Pfeifer 
9156a031f67SYang Yingliang 	if (tb[TCA_NETEM_RATE64])
9166a031f67SYang Yingliang 		q->rate = max_t(u64, q->rate,
9176a031f67SYang Yingliang 				nla_get_u64(tb[TCA_NETEM_RATE64]));
9186a031f67SYang Yingliang 
919e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
920e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
921e4ae004bSEric Dumazet 
922661b7972Sstephen hemminger 	return ret;
9231da177e4SLinus Torvalds }
9241da177e4SLinus Torvalds 
9251e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
9261da177e4SLinus Torvalds {
9271da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
9281da177e4SLinus Torvalds 	int ret;
9291da177e4SLinus Torvalds 
930634576a1SNikolay Aleksandrov 	qdisc_watchdog_init(&q->watchdog, sch);
931634576a1SNikolay Aleksandrov 
9321da177e4SLinus Torvalds 	if (!opt)
9331da177e4SLinus Torvalds 		return -EINVAL;
9341da177e4SLinus Torvalds 
935661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
9361da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
93750612537SEric Dumazet 	if (ret)
938250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
9391da177e4SLinus Torvalds 	return ret;
9401da177e4SLinus Torvalds }
9411da177e4SLinus Torvalds 
9421da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
9431da177e4SLinus Torvalds {
9441da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
9451da177e4SLinus Torvalds 
94659cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
94750612537SEric Dumazet 	if (q->qdisc)
9481da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
9496373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
9501da177e4SLinus Torvalds }
9511da177e4SLinus Torvalds 
952661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
953661b7972Sstephen hemminger 			   struct sk_buff *skb)
954661b7972Sstephen hemminger {
955661b7972Sstephen hemminger 	struct nlattr *nest;
956661b7972Sstephen hemminger 
957661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
958661b7972Sstephen hemminger 	if (nest == NULL)
959661b7972Sstephen hemminger 		goto nla_put_failure;
960661b7972Sstephen hemminger 
961661b7972Sstephen hemminger 	switch (q->loss_model) {
962661b7972Sstephen hemminger 	case CLG_RANDOM:
963661b7972Sstephen hemminger 		/* legacy loss model */
964661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
965661b7972Sstephen hemminger 		return 0;	/* no data */
966661b7972Sstephen hemminger 
967661b7972Sstephen hemminger 	case CLG_4_STATES: {
968661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
969661b7972Sstephen hemminger 			.p13 = q->clg.a1,
970661b7972Sstephen hemminger 			.p31 = q->clg.a2,
971661b7972Sstephen hemminger 			.p32 = q->clg.a3,
972661b7972Sstephen hemminger 			.p14 = q->clg.a4,
973661b7972Sstephen hemminger 			.p23 = q->clg.a5,
974661b7972Sstephen hemminger 		};
975661b7972Sstephen hemminger 
9761b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
9771b34ec43SDavid S. Miller 			goto nla_put_failure;
978661b7972Sstephen hemminger 		break;
979661b7972Sstephen hemminger 	}
980661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
981661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
982661b7972Sstephen hemminger 			.p = q->clg.a1,
983661b7972Sstephen hemminger 			.r = q->clg.a2,
984661b7972Sstephen hemminger 			.h = q->clg.a3,
985661b7972Sstephen hemminger 			.k1 = q->clg.a4,
986661b7972Sstephen hemminger 		};
987661b7972Sstephen hemminger 
9881b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
9891b34ec43SDavid S. Miller 			goto nla_put_failure;
990661b7972Sstephen hemminger 		break;
991661b7972Sstephen hemminger 	}
992661b7972Sstephen hemminger 	}
993661b7972Sstephen hemminger 
994661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
995661b7972Sstephen hemminger 	return 0;
996661b7972Sstephen hemminger 
997661b7972Sstephen hemminger nla_put_failure:
998661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
999661b7972Sstephen hemminger 	return -1;
1000661b7972Sstephen hemminger }
1001661b7972Sstephen hemminger 
10021da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
10031da177e4SLinus Torvalds {
10041da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
1005861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
10061da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
10071da177e4SLinus Torvalds 	struct tc_netem_corr cor;
10080dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
1009c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
10107bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
10111da177e4SLinus Torvalds 
1012112f9cb6SDave Taht 	qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1013112f9cb6SDave Taht 			     UINT_MAX);
1014112f9cb6SDave Taht 	qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1015112f9cb6SDave Taht 			    UINT_MAX);
10161da177e4SLinus Torvalds 	qopt.limit = q->limit;
10171da177e4SLinus Torvalds 	qopt.loss = q->loss;
10181da177e4SLinus Torvalds 	qopt.gap = q->gap;
10191da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
10201b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
10211b34ec43SDavid S. Miller 		goto nla_put_failure;
10221da177e4SLinus Torvalds 
10231da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
10241da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
10251da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
10261b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
10271b34ec43SDavid S. Miller 		goto nla_put_failure;
10280dca51d3SStephen Hemminger 
10290dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
10300dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
10311b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
10321b34ec43SDavid S. Miller 		goto nla_put_failure;
10330dca51d3SStephen Hemminger 
1034c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
1035c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
10361b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
10371b34ec43SDavid S. Miller 		goto nla_put_failure;
1038c865e5d9SStephen Hemminger 
10396a031f67SYang Yingliang 	if (q->rate >= (1ULL << 32)) {
10402a51c1e8SNicolas Dichtel 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
10412a51c1e8SNicolas Dichtel 				      TCA_NETEM_PAD))
10426a031f67SYang Yingliang 			goto nla_put_failure;
10436a031f67SYang Yingliang 		rate.rate = ~0U;
10446a031f67SYang Yingliang 	} else {
10457bc0f28cSHagen Paul Pfeifer 		rate.rate = q->rate;
10466a031f67SYang Yingliang 	}
104790b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
104890b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
104990b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
10501b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
10511b34ec43SDavid S. Miller 		goto nla_put_failure;
10527bc0f28cSHagen Paul Pfeifer 
1053e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1054e4ae004bSEric Dumazet 		goto nla_put_failure;
1055e4ae004bSEric Dumazet 
1056661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
1057661b7972Sstephen hemminger 		goto nla_put_failure;
1058661b7972Sstephen hemminger 
1059861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
10601da177e4SLinus Torvalds 
10611e90474cSPatrick McHardy nla_put_failure:
1062861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
10631da177e4SLinus Torvalds 	return -1;
10641da177e4SLinus Torvalds }
10651da177e4SLinus Torvalds 
106610f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
106710f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
106810f6dfcfSstephen hemminger {
106910f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
107010f6dfcfSstephen hemminger 
107150612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
107210f6dfcfSstephen hemminger 		return -ENOENT;
107310f6dfcfSstephen hemminger 
107410f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
107510f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
107610f6dfcfSstephen hemminger 
107710f6dfcfSstephen hemminger 	return 0;
107810f6dfcfSstephen hemminger }
107910f6dfcfSstephen hemminger 
108010f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
108110f6dfcfSstephen hemminger 		     struct Qdisc **old)
108210f6dfcfSstephen hemminger {
108310f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
108410f6dfcfSstephen hemminger 
108586a7996cSWANG Cong 	*old = qdisc_replace(sch, new, &q->qdisc);
108610f6dfcfSstephen hemminger 	return 0;
108710f6dfcfSstephen hemminger }
108810f6dfcfSstephen hemminger 
108910f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
109010f6dfcfSstephen hemminger {
109110f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
109210f6dfcfSstephen hemminger 	return q->qdisc;
109310f6dfcfSstephen hemminger }
109410f6dfcfSstephen hemminger 
1095143976ceSWANG Cong static unsigned long netem_find(struct Qdisc *sch, u32 classid)
109610f6dfcfSstephen hemminger {
109710f6dfcfSstephen hemminger 	return 1;
109810f6dfcfSstephen hemminger }
109910f6dfcfSstephen hemminger 
110010f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
110110f6dfcfSstephen hemminger {
110210f6dfcfSstephen hemminger 	if (!walker->stop) {
110310f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
110410f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
110510f6dfcfSstephen hemminger 				walker->stop = 1;
110610f6dfcfSstephen hemminger 				return;
110710f6dfcfSstephen hemminger 			}
110810f6dfcfSstephen hemminger 		walker->count++;
110910f6dfcfSstephen hemminger 	}
111010f6dfcfSstephen hemminger }
111110f6dfcfSstephen hemminger 
111210f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
111310f6dfcfSstephen hemminger 	.graft		=	netem_graft,
111410f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
1115143976ceSWANG Cong 	.find		=	netem_find,
111610f6dfcfSstephen hemminger 	.walk		=	netem_walk,
111710f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
111810f6dfcfSstephen hemminger };
111910f6dfcfSstephen hemminger 
112020fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
11211da177e4SLinus Torvalds 	.id		=	"netem",
112210f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
11231da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
11241da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
11251da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
112677be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
11271da177e4SLinus Torvalds 	.init		=	netem_init,
11281da177e4SLinus Torvalds 	.reset		=	netem_reset,
11291da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
11301da177e4SLinus Torvalds 	.change		=	netem_change,
11311da177e4SLinus Torvalds 	.dump		=	netem_dump,
11321da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
11331da177e4SLinus Torvalds };
11341da177e4SLinus Torvalds 
11351da177e4SLinus Torvalds 
11361da177e4SLinus Torvalds static int __init netem_module_init(void)
11371da177e4SLinus Torvalds {
1138eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
11391da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
11401da177e4SLinus Torvalds }
11411da177e4SLinus Torvalds static void __exit netem_module_exit(void)
11421da177e4SLinus Torvalds {
11431da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
11441da177e4SLinus Torvalds }
11451da177e4SLinus Torvalds module_init(netem_module_init)
11461da177e4SLinus Torvalds module_exit(netem_module_exit)
11471da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1148