xref: /openbmc/linux/net/sched/sch_netem.c (revision 5080f39e)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
26aec0a40aSEric Dumazet #include <linux/rbtree.h>
271da177e4SLinus Torvalds 
28dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
291da177e4SLinus Torvalds #include <net/pkt_sched.h>
30e4ae004bSEric Dumazet #include <net/inet_ecn.h>
311da177e4SLinus Torvalds 
32250a65f7Sstephen hemminger #define VERSION "1.3"
33eb229c4cSStephen Hemminger 
341da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
351da177e4SLinus Torvalds 	====================================
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
381da177e4SLinus Torvalds 		 Network Emulation Tool
391da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
401da177e4SLinus Torvalds 
411da177e4SLinus Torvalds 	 ----------------------------------------------------------------
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
441da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
451da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
461da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
471da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
481da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
491da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
521da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
531da177e4SLinus Torvalds 	 control either since that can be handled by using token
541da177e4SLinus Torvalds 	 bucket or other rate control.
55661b7972Sstephen hemminger 
56661b7972Sstephen hemminger      Correlated Loss Generator models
57661b7972Sstephen hemminger 
58661b7972Sstephen hemminger 	Added generation of correlated loss according to the
59661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
60661b7972Sstephen hemminger 
61661b7972Sstephen hemminger 	References:
62661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
65661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
66661b7972Sstephen hemminger 
67661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
691da177e4SLinus Torvalds */
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds struct netem_sched_data {
72aec0a40aSEric Dumazet 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
73aec0a40aSEric Dumazet 	struct rb_root t_root;
7450612537SEric Dumazet 
7550612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
761da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7750612537SEric Dumazet 
7859cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
791da177e4SLinus Torvalds 
80b407621cSStephen Hemminger 	psched_tdiff_t latency;
81b407621cSStephen Hemminger 	psched_tdiff_t jitter;
82b407621cSStephen Hemminger 
831da177e4SLinus Torvalds 	u32 loss;
84e4ae004bSEric Dumazet 	u32 ecn;
851da177e4SLinus Torvalds 	u32 limit;
861da177e4SLinus Torvalds 	u32 counter;
871da177e4SLinus Torvalds 	u32 gap;
881da177e4SLinus Torvalds 	u32 duplicate;
890dca51d3SStephen Hemminger 	u32 reorder;
90c865e5d9SStephen Hemminger 	u32 corrupt;
916a031f67SYang Yingliang 	u64 rate;
9290b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9390b41a1cSHagen Paul Pfeifer 	u32 cell_size;
94809fa972SHannes Frederic Sowa 	struct reciprocal_value cell_size_reciprocal;
9590b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds 	struct crndstate {
98b407621cSStephen Hemminger 		u32 last;
99b407621cSStephen Hemminger 		u32 rho;
100c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1011da177e4SLinus Torvalds 
1021da177e4SLinus Torvalds 	struct disttable {
1031da177e4SLinus Torvalds 		u32  size;
1041da177e4SLinus Torvalds 		s16 table[0];
1051da177e4SLinus Torvalds 	} *delay_dist;
106661b7972Sstephen hemminger 
107661b7972Sstephen hemminger 	enum  {
108661b7972Sstephen hemminger 		CLG_RANDOM,
109661b7972Sstephen hemminger 		CLG_4_STATES,
110661b7972Sstephen hemminger 		CLG_GILB_ELL,
111661b7972Sstephen hemminger 	} loss_model;
112661b7972Sstephen hemminger 
113a6e2fe17SYang Yingliang 	enum {
114a6e2fe17SYang Yingliang 		TX_IN_GAP_PERIOD = 1,
115a6e2fe17SYang Yingliang 		TX_IN_BURST_PERIOD,
116a6e2fe17SYang Yingliang 		LOST_IN_GAP_PERIOD,
117a6e2fe17SYang Yingliang 		LOST_IN_BURST_PERIOD,
118a6e2fe17SYang Yingliang 	} _4_state_model;
119a6e2fe17SYang Yingliang 
120c045a734SYang Yingliang 	enum {
121c045a734SYang Yingliang 		GOOD_STATE = 1,
122c045a734SYang Yingliang 		BAD_STATE,
123c045a734SYang Yingliang 	} GE_state_model;
124c045a734SYang Yingliang 
125661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
126661b7972Sstephen hemminger 	struct clgstate {
127661b7972Sstephen hemminger 		/* state of the Markov chain */
128661b7972Sstephen hemminger 		u8 state;
129661b7972Sstephen hemminger 
130661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
131661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
132661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
133661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
134661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
135661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
136661b7972Sstephen hemminger 	} clg;
137661b7972Sstephen hemminger 
1381da177e4SLinus Torvalds };
1391da177e4SLinus Torvalds 
14050612537SEric Dumazet /* Time stamp put into socket buffer control block
14150612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
14256b17425SEric Dumazet  *
14356b17425SEric Dumazet  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
14456b17425SEric Dumazet  * and skb->next & skb->prev are scratch space for a qdisc,
14556b17425SEric Dumazet  * we save skb->tstamp value in skb->cb[] before destroying it.
14650612537SEric Dumazet  */
1471da177e4SLinus Torvalds struct netem_skb_cb {
1481da177e4SLinus Torvalds 	psched_time_t	time_to_send;
149aec0a40aSEric Dumazet 	ktime_t		tstamp_save;
1501da177e4SLinus Torvalds };
1511da177e4SLinus Torvalds 
152aec0a40aSEric Dumazet 
153aec0a40aSEric Dumazet static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
154aec0a40aSEric Dumazet {
1557f7cd56cSGeliang Tang 	return rb_entry(rb, struct sk_buff, rbnode);
156aec0a40aSEric Dumazet }
157aec0a40aSEric Dumazet 
1585f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1595f86173bSJussi Kivilinna {
160aec0a40aSEric Dumazet 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
16116bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
162175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1635f86173bSJussi Kivilinna }
1645f86173bSJussi Kivilinna 
1651da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1661da177e4SLinus Torvalds  * Use entropy source for initial seed.
1671da177e4SLinus Torvalds  */
1681da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1691da177e4SLinus Torvalds {
1701da177e4SLinus Torvalds 	state->rho = rho;
17163862b5bSAruna-Hewapathirane 	state->last = prandom_u32();
1721da177e4SLinus Torvalds }
1731da177e4SLinus Torvalds 
1741da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1751da177e4SLinus Torvalds  * Next number depends on last value.
1761da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1771da177e4SLinus Torvalds  */
178b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1791da177e4SLinus Torvalds {
1801da177e4SLinus Torvalds 	u64 value, rho;
1811da177e4SLinus Torvalds 	unsigned long answer;
1821da177e4SLinus Torvalds 
183bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
18463862b5bSAruna-Hewapathirane 		return prandom_u32();
1851da177e4SLinus Torvalds 
18663862b5bSAruna-Hewapathirane 	value = prandom_u32();
1871da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1881da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1891da177e4SLinus Torvalds 	state->last = answer;
1901da177e4SLinus Torvalds 	return answer;
1911da177e4SLinus Torvalds }
1921da177e4SLinus Torvalds 
193661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
194661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
195661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
196661b7972Sstephen hemminger  */
197661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
198661b7972Sstephen hemminger {
199661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
20063862b5bSAruna-Hewapathirane 	u32 rnd = prandom_u32();
201661b7972Sstephen hemminger 
202661b7972Sstephen hemminger 	/*
20325985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
204661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
205661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
206661b7972Sstephen hemminger 	 * The four states correspond to:
207a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
208a6e2fe17SYang Yingliang 	 *   LOST_IN_BURST_PERIOD => isolated losses within a gap period
209a6e2fe17SYang Yingliang 	 *   LOST_IN_GAP_PERIOD => lost packets within a burst period
210a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
211661b7972Sstephen hemminger 	 */
212661b7972Sstephen hemminger 	switch (clg->state) {
213a6e2fe17SYang Yingliang 	case TX_IN_GAP_PERIOD:
214661b7972Sstephen hemminger 		if (rnd < clg->a4) {
215a6e2fe17SYang Yingliang 			clg->state = LOST_IN_BURST_PERIOD;
216661b7972Sstephen hemminger 			return true;
217ab6c27beSstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
218a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
219661b7972Sstephen hemminger 			return true;
220a6e2fe17SYang Yingliang 		} else if (clg->a1 + clg->a4 < rnd) {
221a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
222a6e2fe17SYang Yingliang 		}
223661b7972Sstephen hemminger 
224661b7972Sstephen hemminger 		break;
225a6e2fe17SYang Yingliang 	case TX_IN_BURST_PERIOD:
226661b7972Sstephen hemminger 		if (rnd < clg->a5) {
227a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
228661b7972Sstephen hemminger 			return true;
229a6e2fe17SYang Yingliang 		} else {
230a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
231a6e2fe17SYang Yingliang 		}
232661b7972Sstephen hemminger 
233661b7972Sstephen hemminger 		break;
234a6e2fe17SYang Yingliang 	case LOST_IN_GAP_PERIOD:
235661b7972Sstephen hemminger 		if (rnd < clg->a3)
236a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
237661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
238a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
239661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
240a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
241661b7972Sstephen hemminger 			return true;
242661b7972Sstephen hemminger 		}
243661b7972Sstephen hemminger 		break;
244a6e2fe17SYang Yingliang 	case LOST_IN_BURST_PERIOD:
245a6e2fe17SYang Yingliang 		clg->state = TX_IN_GAP_PERIOD;
246661b7972Sstephen hemminger 		break;
247661b7972Sstephen hemminger 	}
248661b7972Sstephen hemminger 
249661b7972Sstephen hemminger 	return false;
250661b7972Sstephen hemminger }
251661b7972Sstephen hemminger 
252661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
253661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
254661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
255661b7972Sstephen hemminger  *
25625985edcSLucas De Marchi  * Makes a comparison between random number and the transition
257661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
25825985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
259661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
260661b7972Sstephen hemminger  * packet will be transmitted or lost.
261661b7972Sstephen hemminger  */
262661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
263661b7972Sstephen hemminger {
264661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
265661b7972Sstephen hemminger 
266661b7972Sstephen hemminger 	switch (clg->state) {
267c045a734SYang Yingliang 	case GOOD_STATE:
26863862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a1)
269c045a734SYang Yingliang 			clg->state = BAD_STATE;
27063862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a4)
271661b7972Sstephen hemminger 			return true;
2727c2781faSstephen hemminger 		break;
273c045a734SYang Yingliang 	case BAD_STATE:
27463862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a2)
275c045a734SYang Yingliang 			clg->state = GOOD_STATE;
27663862b5bSAruna-Hewapathirane 		if (prandom_u32() > clg->a3)
277661b7972Sstephen hemminger 			return true;
278661b7972Sstephen hemminger 	}
279661b7972Sstephen hemminger 
280661b7972Sstephen hemminger 	return false;
281661b7972Sstephen hemminger }
282661b7972Sstephen hemminger 
283661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
284661b7972Sstephen hemminger {
285661b7972Sstephen hemminger 	switch (q->loss_model) {
286661b7972Sstephen hemminger 	case CLG_RANDOM:
287661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
288661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
289661b7972Sstephen hemminger 
290661b7972Sstephen hemminger 	case CLG_4_STATES:
291661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
292661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
293661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
294661b7972Sstephen hemminger 		* the kernel logs
295661b7972Sstephen hemminger 		*/
296661b7972Sstephen hemminger 		return loss_4state(q);
297661b7972Sstephen hemminger 
298661b7972Sstephen hemminger 	case CLG_GILB_ELL:
299661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
300661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
301661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
302661b7972Sstephen hemminger 		* the kernel logs
303661b7972Sstephen hemminger 		*/
304661b7972Sstephen hemminger 		return loss_gilb_ell(q);
305661b7972Sstephen hemminger 	}
306661b7972Sstephen hemminger 
307661b7972Sstephen hemminger 	return false;	/* not reached */
308661b7972Sstephen hemminger }
309661b7972Sstephen hemminger 
310661b7972Sstephen hemminger 
3111da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
3121da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
3131da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
3141da177e4SLinus Torvalds  */
315b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
316b407621cSStephen Hemminger 				struct crndstate *state,
317b407621cSStephen Hemminger 				const struct disttable *dist)
3181da177e4SLinus Torvalds {
319b407621cSStephen Hemminger 	psched_tdiff_t x;
320b407621cSStephen Hemminger 	long t;
321b407621cSStephen Hemminger 	u32 rnd;
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds 	if (sigma == 0)
3241da177e4SLinus Torvalds 		return mu;
3251da177e4SLinus Torvalds 
3261da177e4SLinus Torvalds 	rnd = get_crandom(state);
3271da177e4SLinus Torvalds 
3281da177e4SLinus Torvalds 	/* default uniform distribution */
3291da177e4SLinus Torvalds 	if (dist == NULL)
3301da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3331da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3341da177e4SLinus Torvalds 	if (x >= 0)
3351da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3361da177e4SLinus Torvalds 	else
3371da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3381da177e4SLinus Torvalds 
3391da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3401da177e4SLinus Torvalds }
3411da177e4SLinus Torvalds 
34290b41a1cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
3437bc0f28cSHagen Paul Pfeifer {
34490b41a1cSHagen Paul Pfeifer 	u64 ticks;
345fc33cc72SEric Dumazet 
34690b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
34790b41a1cSHagen Paul Pfeifer 
34890b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
34990b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
35090b41a1cSHagen Paul Pfeifer 
35190b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
35290b41a1cSHagen Paul Pfeifer 			cells++;
35390b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
35490b41a1cSHagen Paul Pfeifer 	}
35590b41a1cSHagen Paul Pfeifer 
35690b41a1cSHagen Paul Pfeifer 	ticks = (u64)len * NSEC_PER_SEC;
35790b41a1cSHagen Paul Pfeifer 
35890b41a1cSHagen Paul Pfeifer 	do_div(ticks, q->rate);
359fc33cc72SEric Dumazet 	return PSCHED_NS2TICKS(ticks);
3607bc0f28cSHagen Paul Pfeifer }
3617bc0f28cSHagen Paul Pfeifer 
362ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch)
363ff704050Sstephen hemminger {
364ff704050Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
365ff704050Sstephen hemminger 	struct rb_node *p;
366ff704050Sstephen hemminger 
367ff704050Sstephen hemminger 	while ((p = rb_first(&q->t_root))) {
368ff704050Sstephen hemminger 		struct sk_buff *skb = netem_rb_to_skb(p);
369ff704050Sstephen hemminger 
370ff704050Sstephen hemminger 		rb_erase(p, &q->t_root);
3712f08a9a1SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
372ff704050Sstephen hemminger 	}
373ff704050Sstephen hemminger }
374ff704050Sstephen hemminger 
375960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
37650612537SEric Dumazet {
377aec0a40aSEric Dumazet 	struct netem_sched_data *q = qdisc_priv(sch);
37850612537SEric Dumazet 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
379aec0a40aSEric Dumazet 	struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
38050612537SEric Dumazet 
381aec0a40aSEric Dumazet 	while (*p) {
382aec0a40aSEric Dumazet 		struct sk_buff *skb;
38350612537SEric Dumazet 
384aec0a40aSEric Dumazet 		parent = *p;
385aec0a40aSEric Dumazet 		skb = netem_rb_to_skb(parent);
38650612537SEric Dumazet 		if (tnext >= netem_skb_cb(skb)->time_to_send)
387aec0a40aSEric Dumazet 			p = &parent->rb_right;
388aec0a40aSEric Dumazet 		else
389aec0a40aSEric Dumazet 			p = &parent->rb_left;
39050612537SEric Dumazet 	}
39156b17425SEric Dumazet 	rb_link_node(&nskb->rbnode, parent, p);
39256b17425SEric Dumazet 	rb_insert_color(&nskb->rbnode, &q->t_root);
393aec0a40aSEric Dumazet 	sch->q.qlen++;
39450612537SEric Dumazet }
39550612537SEric Dumazet 
3966071bd1aSNeil Horman /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
3976071bd1aSNeil Horman  * when we statistically choose to corrupt one, we instead segment it, returning
3986071bd1aSNeil Horman  * the first packet to be corrupted, and re-enqueue the remaining frames
3996071bd1aSNeil Horman  */
400520ac30fSEric Dumazet static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
401520ac30fSEric Dumazet 				     struct sk_buff **to_free)
4026071bd1aSNeil Horman {
4036071bd1aSNeil Horman 	struct sk_buff *segs;
4046071bd1aSNeil Horman 	netdev_features_t features = netif_skb_features(skb);
4056071bd1aSNeil Horman 
4066071bd1aSNeil Horman 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
4076071bd1aSNeil Horman 
4086071bd1aSNeil Horman 	if (IS_ERR_OR_NULL(segs)) {
409520ac30fSEric Dumazet 		qdisc_drop(skb, sch, to_free);
4106071bd1aSNeil Horman 		return NULL;
4116071bd1aSNeil Horman 	}
4126071bd1aSNeil Horman 	consume_skb(skb);
4136071bd1aSNeil Horman 	return segs;
4146071bd1aSNeil Horman }
4156071bd1aSNeil Horman 
41648da34b7SFlorian Westphal static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
41748da34b7SFlorian Westphal {
41848da34b7SFlorian Westphal 	skb->next = qh->head;
41948da34b7SFlorian Westphal 
42048da34b7SFlorian Westphal 	if (!qh->head)
42148da34b7SFlorian Westphal 		qh->tail = skb;
42248da34b7SFlorian Westphal 	qh->head = skb;
42348da34b7SFlorian Westphal 	qh->qlen++;
42448da34b7SFlorian Westphal }
42548da34b7SFlorian Westphal 
4260afb51e7SStephen Hemminger /*
4270afb51e7SStephen Hemminger  * Insert one skb into qdisc.
4280afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
4290afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
4300afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
4310afb51e7SStephen Hemminger  */
432520ac30fSEric Dumazet static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
433520ac30fSEric Dumazet 			 struct sk_buff **to_free)
4341da177e4SLinus Torvalds {
4351da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
43689e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
43789e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
4380afb51e7SStephen Hemminger 	struct sk_buff *skb2;
4396071bd1aSNeil Horman 	struct sk_buff *segs = NULL;
4406071bd1aSNeil Horman 	unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
4416071bd1aSNeil Horman 	int nb = 0;
4420afb51e7SStephen Hemminger 	int count = 1;
4436071bd1aSNeil Horman 	int rc = NET_XMIT_SUCCESS;
4441da177e4SLinus Torvalds 
4450afb51e7SStephen Hemminger 	/* Random duplication */
4460afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
4470afb51e7SStephen Hemminger 		++count;
4480afb51e7SStephen Hemminger 
449661b7972Sstephen hemminger 	/* Drop packet? */
450e4ae004bSEric Dumazet 	if (loss_event(q)) {
451e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
45225331d6cSJohn Fastabend 			qdisc_qstats_drop(sch); /* mark packet */
453e4ae004bSEric Dumazet 		else
4540afb51e7SStephen Hemminger 			--count;
455e4ae004bSEric Dumazet 	}
4560afb51e7SStephen Hemminger 	if (count == 0) {
45725331d6cSJohn Fastabend 		qdisc_qstats_drop(sch);
458520ac30fSEric Dumazet 		__qdisc_drop(skb, to_free);
459c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
4601da177e4SLinus Torvalds 	}
4611da177e4SLinus Torvalds 
4625a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
4635a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
4645a308f40SEric Dumazet 	 */
4655080f39eSNik Unger 	if (q->latency || q->jitter || q->rate)
466f2f872f9SEric Dumazet 		skb_orphan_partial(skb);
4674e8a5201SDavid S. Miller 
4680afb51e7SStephen Hemminger 	/*
4690afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
4700afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
4710afb51e7SStephen Hemminger 	 * skb will be queued.
472d5d75cd6SStephen Hemminger 	 */
4730afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
4747698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
4750afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
476d5d75cd6SStephen Hemminger 
477b396cca6SEric Dumazet 		q->duplicate = 0;
478520ac30fSEric Dumazet 		rootq->enqueue(skb2, rootq, to_free);
4790afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4801da177e4SLinus Torvalds 	}
4811da177e4SLinus Torvalds 
482c865e5d9SStephen Hemminger 	/*
483c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
484c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
485c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
486c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
487c865e5d9SStephen Hemminger 	 */
488c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
4896071bd1aSNeil Horman 		if (skb_is_gso(skb)) {
490520ac30fSEric Dumazet 			segs = netem_segment(skb, sch, to_free);
4916071bd1aSNeil Horman 			if (!segs)
4926071bd1aSNeil Horman 				return NET_XMIT_DROP;
4936071bd1aSNeil Horman 		} else {
4946071bd1aSNeil Horman 			segs = skb;
4956071bd1aSNeil Horman 		}
4966071bd1aSNeil Horman 
4976071bd1aSNeil Horman 		skb = segs;
4986071bd1aSNeil Horman 		segs = segs->next;
4996071bd1aSNeil Horman 
5008a6e9c67SEric Dumazet 		skb = skb_unshare(skb, GFP_ATOMIC);
5018a6e9c67SEric Dumazet 		if (unlikely(!skb)) {
5028a6e9c67SEric Dumazet 			qdisc_qstats_drop(sch);
5038a6e9c67SEric Dumazet 			goto finish_segs;
5048a6e9c67SEric Dumazet 		}
5058a6e9c67SEric Dumazet 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
5068a6e9c67SEric Dumazet 		    skb_checksum_help(skb)) {
5078a6e9c67SEric Dumazet 			qdisc_drop(skb, sch, to_free);
5086071bd1aSNeil Horman 			goto finish_segs;
5096071bd1aSNeil Horman 		}
510c865e5d9SStephen Hemminger 
51163862b5bSAruna-Hewapathirane 		skb->data[prandom_u32() % skb_headlen(skb)] ^=
51263862b5bSAruna-Hewapathirane 			1<<(prandom_u32() % 8);
513c865e5d9SStephen Hemminger 	}
514c865e5d9SStephen Hemminger 
51597d0678fSFlorian Westphal 	if (unlikely(sch->q.qlen >= sch->limit))
516520ac30fSEric Dumazet 		return qdisc_drop(skb, sch, to_free);
517960fb66eSEric Dumazet 
51825331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
519960fb66eSEric Dumazet 
5205f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
521f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
522a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
523f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
5240f9f32acSStephen Hemminger 		psched_time_t now;
52507aaa115SStephen Hemminger 		psched_tdiff_t delay;
52607aaa115SStephen Hemminger 
52707aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
52807aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
52907aaa115SStephen Hemminger 
5303bebcda2SPatrick McHardy 		now = psched_get_time();
5317bc0f28cSHagen Paul Pfeifer 
5327bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
5335080f39eSNik Unger 			struct netem_skb_cb *last = NULL;
5347bc0f28cSHagen Paul Pfeifer 
5355080f39eSNik Unger 			if (sch->q.tail)
5365080f39eSNik Unger 				last = netem_skb_cb(sch->q.tail);
5375080f39eSNik Unger 			if (q->t_root.rb_node) {
5385080f39eSNik Unger 				struct sk_buff *t_skb;
5395080f39eSNik Unger 				struct netem_skb_cb *t_last;
5405080f39eSNik Unger 
5415080f39eSNik Unger 				t_skb = netem_rb_to_skb(rb_last(&q->t_root));
5425080f39eSNik Unger 				t_last = netem_skb_cb(t_skb);
5435080f39eSNik Unger 				if (!last ||
5445080f39eSNik Unger 				    t_last->time_to_send > last->time_to_send) {
5455080f39eSNik Unger 					last = t_last;
5465080f39eSNik Unger 				}
5475080f39eSNik Unger 			}
5485080f39eSNik Unger 
549aec0a40aSEric Dumazet 			if (last) {
5507bc0f28cSHagen Paul Pfeifer 				/*
551a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
552a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
5537bc0f28cSHagen Paul Pfeifer 				 * from delay.
5547bc0f28cSHagen Paul Pfeifer 				 */
5555080f39eSNik Unger 				delay -= last->time_to_send - now;
556a13d3104SJohannes Naab 				delay = max_t(psched_tdiff_t, 0, delay);
5575080f39eSNik Unger 				now = last->time_to_send;
5587bc0f28cSHagen Paul Pfeifer 			}
559a13d3104SJohannes Naab 
5608cfd88d6SYang Yingliang 			delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
5617bc0f28cSHagen Paul Pfeifer 		}
5627bc0f28cSHagen Paul Pfeifer 
5637c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
564aec0a40aSEric Dumazet 		cb->tstamp_save = skb->tstamp;
5651da177e4SLinus Torvalds 		++q->counter;
566960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
5671da177e4SLinus Torvalds 	} else {
5680dca51d3SStephen Hemminger 		/*
5690dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
5700dca51d3SStephen Hemminger 		 * of the queue.
5710dca51d3SStephen Hemminger 		 */
5723bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
5730dca51d3SStephen Hemminger 		q->counter = 0;
5748ba25dadSJarek Poplawski 
57548da34b7SFlorian Westphal 		netem_enqueue_skb_head(&sch->q, skb);
576eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
577378a2f09SJarek Poplawski 	}
5781da177e4SLinus Torvalds 
5796071bd1aSNeil Horman finish_segs:
5806071bd1aSNeil Horman 	if (segs) {
5816071bd1aSNeil Horman 		while (segs) {
5826071bd1aSNeil Horman 			skb2 = segs->next;
5836071bd1aSNeil Horman 			segs->next = NULL;
5846071bd1aSNeil Horman 			qdisc_skb_cb(segs)->pkt_len = segs->len;
5856071bd1aSNeil Horman 			last_len = segs->len;
586520ac30fSEric Dumazet 			rc = qdisc_enqueue(segs, sch, to_free);
5876071bd1aSNeil Horman 			if (rc != NET_XMIT_SUCCESS) {
5886071bd1aSNeil Horman 				if (net_xmit_drop_count(rc))
5896071bd1aSNeil Horman 					qdisc_qstats_drop(sch);
5906071bd1aSNeil Horman 			} else {
5916071bd1aSNeil Horman 				nb++;
5926071bd1aSNeil Horman 				len += last_len;
5936071bd1aSNeil Horman 			}
5946071bd1aSNeil Horman 			segs = skb2;
5956071bd1aSNeil Horman 		}
5966071bd1aSNeil Horman 		sch->q.qlen += nb;
5976071bd1aSNeil Horman 		if (nb > 1)
5986071bd1aSNeil Horman 			qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
5996071bd1aSNeil Horman 	}
60010f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
6011da177e4SLinus Torvalds }
6021da177e4SLinus Torvalds 
6031da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
6041da177e4SLinus Torvalds {
6051da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6061da177e4SLinus Torvalds 	struct sk_buff *skb;
607aec0a40aSEric Dumazet 	struct rb_node *p;
6081da177e4SLinus Torvalds 
60950612537SEric Dumazet tfifo_dequeue:
610ed760cb8SFlorian Westphal 	skb = __qdisc_dequeue_head(&sch->q);
611771018e7SStephen Hemminger 	if (skb) {
61225331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
6130ad2a836SBeshay, Joseph deliver:
614aec0a40aSEric Dumazet 		qdisc_bstats_update(sch, skb);
615aec0a40aSEric Dumazet 		return skb;
616aec0a40aSEric Dumazet 	}
617aec0a40aSEric Dumazet 	p = rb_first(&q->t_root);
618aec0a40aSEric Dumazet 	if (p) {
61936b7bfe0SEric Dumazet 		psched_time_t time_to_send;
62036b7bfe0SEric Dumazet 
621aec0a40aSEric Dumazet 		skb = netem_rb_to_skb(p);
6220f9f32acSStephen Hemminger 
6230f9f32acSStephen Hemminger 		/* if more time remaining? */
62436b7bfe0SEric Dumazet 		time_to_send = netem_skb_cb(skb)->time_to_send;
62536b7bfe0SEric Dumazet 		if (time_to_send <= psched_get_time()) {
626aec0a40aSEric Dumazet 			rb_erase(p, &q->t_root);
627aec0a40aSEric Dumazet 
628aec0a40aSEric Dumazet 			sch->q.qlen--;
6290ad2a836SBeshay, Joseph 			qdisc_qstats_backlog_dec(sch, skb);
630aec0a40aSEric Dumazet 			skb->next = NULL;
631aec0a40aSEric Dumazet 			skb->prev = NULL;
632aec0a40aSEric Dumazet 			skb->tstamp = netem_skb_cb(skb)->tstamp_save;
63303c05f0dSJarek Poplawski 
6348caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
6358caf1539SJarek Poplawski 			/*
6368caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
6378caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
6388caf1539SJarek Poplawski 			 */
639bc31c905SWillem de Bruijn 			if (skb->tc_redirected && skb->tc_from_ingress)
6402456e855SThomas Gleixner 				skb->tstamp = 0;
6418caf1539SJarek Poplawski #endif
64210f6dfcfSstephen hemminger 
64350612537SEric Dumazet 			if (q->qdisc) {
64421de12eeSEric Dumazet 				unsigned int pkt_len = qdisc_pkt_len(skb);
645520ac30fSEric Dumazet 				struct sk_buff *to_free = NULL;
646520ac30fSEric Dumazet 				int err;
64750612537SEric Dumazet 
648520ac30fSEric Dumazet 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
649520ac30fSEric Dumazet 				kfree_skb_list(to_free);
65021de12eeSEric Dumazet 				if (err != NET_XMIT_SUCCESS &&
65121de12eeSEric Dumazet 				    net_xmit_drop_count(err)) {
65225331d6cSJohn Fastabend 					qdisc_qstats_drop(sch);
6532ccccf5fSWANG Cong 					qdisc_tree_reduce_backlog(sch, 1,
65421de12eeSEric Dumazet 								  pkt_len);
65550612537SEric Dumazet 				}
65650612537SEric Dumazet 				goto tfifo_dequeue;
65750612537SEric Dumazet 			}
658aec0a40aSEric Dumazet 			goto deliver;
65911274e5aSStephen Hemminger 		}
66007aaa115SStephen Hemminger 
66150612537SEric Dumazet 		if (q->qdisc) {
66250612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
66350612537SEric Dumazet 			if (skb)
66450612537SEric Dumazet 				goto deliver;
66550612537SEric Dumazet 		}
66636b7bfe0SEric Dumazet 		qdisc_watchdog_schedule(&q->watchdog, time_to_send);
6670f9f32acSStephen Hemminger 	}
6680f9f32acSStephen Hemminger 
66950612537SEric Dumazet 	if (q->qdisc) {
67050612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
67150612537SEric Dumazet 		if (skb)
67250612537SEric Dumazet 			goto deliver;
67350612537SEric Dumazet 	}
6740f9f32acSStephen Hemminger 	return NULL;
6751da177e4SLinus Torvalds }
6761da177e4SLinus Torvalds 
6771da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
6781da177e4SLinus Torvalds {
6791da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6801da177e4SLinus Torvalds 
68150612537SEric Dumazet 	qdisc_reset_queue(sch);
682ff704050Sstephen hemminger 	tfifo_reset(sch);
68350612537SEric Dumazet 	if (q->qdisc)
6841da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
68559cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
6861da177e4SLinus Torvalds }
6871da177e4SLinus Torvalds 
6886373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
6896373a9a2Sstephen hemminger {
6904cb28970SWANG Cong 	kvfree(d);
6916373a9a2Sstephen hemminger }
6926373a9a2Sstephen hemminger 
6931da177e4SLinus Torvalds /*
6941da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
6951da177e4SLinus Torvalds  * signed 16 bit values.
6961da177e4SLinus Torvalds  */
6971e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
6981da177e4SLinus Torvalds {
6991da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7006373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
7011e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
7027698b4fcSDavid S. Miller 	spinlock_t *root_lock;
7031da177e4SLinus Torvalds 	struct disttable *d;
7041da177e4SLinus Torvalds 	int i;
7056373a9a2Sstephen hemminger 	size_t s;
7061da177e4SLinus Torvalds 
707df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
7081da177e4SLinus Torvalds 		return -EINVAL;
7091da177e4SLinus Torvalds 
7106373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
711bb52c7acSEric Dumazet 	d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
7126373a9a2Sstephen hemminger 	if (!d)
7136373a9a2Sstephen hemminger 		d = vmalloc(s);
7141da177e4SLinus Torvalds 	if (!d)
7151da177e4SLinus Torvalds 		return -ENOMEM;
7161da177e4SLinus Torvalds 
7171da177e4SLinus Torvalds 	d->size = n;
7181da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
7191da177e4SLinus Torvalds 		d->table[i] = data[i];
7201da177e4SLinus Torvalds 
721102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
7227698b4fcSDavid S. Miller 
7237698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
724bb52c7acSEric Dumazet 	swap(q->delay_dist, d);
7257698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
726bb52c7acSEric Dumazet 
727bb52c7acSEric Dumazet 	dist_free(d);
7281da177e4SLinus Torvalds 	return 0;
7291da177e4SLinus Torvalds }
7301da177e4SLinus Torvalds 
73149545a77SYang Yingliang static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
7321da177e4SLinus Torvalds {
7331e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
7341da177e4SLinus Torvalds 
7351da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
7361da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
7371da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
7381da177e4SLinus Torvalds }
7391da177e4SLinus Torvalds 
74049545a77SYang Yingliang static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
7410dca51d3SStephen Hemminger {
7421e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
7430dca51d3SStephen Hemminger 
7440dca51d3SStephen Hemminger 	q->reorder = r->probability;
7450dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
7460dca51d3SStephen Hemminger }
7470dca51d3SStephen Hemminger 
74849545a77SYang Yingliang static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
749c865e5d9SStephen Hemminger {
7501e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
751c865e5d9SStephen Hemminger 
752c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
753c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
754c865e5d9SStephen Hemminger }
755c865e5d9SStephen Hemminger 
75649545a77SYang Yingliang static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
7577bc0f28cSHagen Paul Pfeifer {
7587bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
7597bc0f28cSHagen Paul Pfeifer 
7607bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
76190b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
76290b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
763809fa972SHannes Frederic Sowa 	q->cell_overhead = r->cell_overhead;
76490b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
76590b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
766809fa972SHannes Frederic Sowa 	else
767809fa972SHannes Frederic Sowa 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
7687bc0f28cSHagen Paul Pfeifer }
7697bc0f28cSHagen Paul Pfeifer 
77049545a77SYang Yingliang static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
771661b7972Sstephen hemminger {
772661b7972Sstephen hemminger 	const struct nlattr *la;
773661b7972Sstephen hemminger 	int rem;
774661b7972Sstephen hemminger 
775661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
776661b7972Sstephen hemminger 		u16 type = nla_type(la);
777661b7972Sstephen hemminger 
778661b7972Sstephen hemminger 		switch (type) {
779661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
780661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
781661b7972Sstephen hemminger 
7822494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
783661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
784661b7972Sstephen hemminger 				return -EINVAL;
785661b7972Sstephen hemminger 			}
786661b7972Sstephen hemminger 
787661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
788661b7972Sstephen hemminger 
7893fbac2a8SYang Yingliang 			q->clg.state = TX_IN_GAP_PERIOD;
790661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
791661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
792661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
793661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
794661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
795661b7972Sstephen hemminger 			break;
796661b7972Sstephen hemminger 		}
797661b7972Sstephen hemminger 
798661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
799661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
800661b7972Sstephen hemminger 
8012494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
8022494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
803661b7972Sstephen hemminger 				return -EINVAL;
804661b7972Sstephen hemminger 			}
805661b7972Sstephen hemminger 
806661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
8073fbac2a8SYang Yingliang 			q->clg.state = GOOD_STATE;
808661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
809661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
810661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
811661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
812661b7972Sstephen hemminger 			break;
813661b7972Sstephen hemminger 		}
814661b7972Sstephen hemminger 
815661b7972Sstephen hemminger 		default:
816661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
817661b7972Sstephen hemminger 			return -EINVAL;
818661b7972Sstephen hemminger 		}
819661b7972Sstephen hemminger 	}
820661b7972Sstephen hemminger 
821661b7972Sstephen hemminger 	return 0;
822661b7972Sstephen hemminger }
823661b7972Sstephen hemminger 
82427a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
82527a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
82627a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
82727a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
8287bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
829661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
830e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
8316a031f67SYang Yingliang 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
83227a3421eSPatrick McHardy };
83327a3421eSPatrick McHardy 
8342c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
8352c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
8362c10b32bSThomas Graf {
8372c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
8382c10b32bSThomas Graf 
839661b7972Sstephen hemminger 	if (nested_len < 0) {
840661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
8412c10b32bSThomas Graf 		return -EINVAL;
842661b7972Sstephen hemminger 	}
843661b7972Sstephen hemminger 
8442c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
8452c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
8462c10b32bSThomas Graf 				 nested_len, policy);
847661b7972Sstephen hemminger 
8482c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
8492c10b32bSThomas Graf 	return 0;
8502c10b32bSThomas Graf }
8512c10b32bSThomas Graf 
852c865e5d9SStephen Hemminger /* Parse netlink message to set options */
8531e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
8541da177e4SLinus Torvalds {
8551da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
856b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
8571da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
85854a4b05cSYang Yingliang 	struct clgstate old_clg;
85954a4b05cSYang Yingliang 	int old_loss_model = CLG_RANDOM;
8601da177e4SLinus Torvalds 	int ret;
8611da177e4SLinus Torvalds 
862b03f4672SPatrick McHardy 	if (opt == NULL)
8631da177e4SLinus Torvalds 		return -EINVAL;
8641da177e4SLinus Torvalds 
8652c10b32bSThomas Graf 	qopt = nla_data(opt);
8662c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
867b03f4672SPatrick McHardy 	if (ret < 0)
868b03f4672SPatrick McHardy 		return ret;
869b03f4672SPatrick McHardy 
87054a4b05cSYang Yingliang 	/* backup q->clg and q->loss_model */
87154a4b05cSYang Yingliang 	old_clg = q->clg;
87254a4b05cSYang Yingliang 	old_loss_model = q->loss_model;
87354a4b05cSYang Yingliang 
87454a4b05cSYang Yingliang 	if (tb[TCA_NETEM_LOSS]) {
87549545a77SYang Yingliang 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
87654a4b05cSYang Yingliang 		if (ret) {
87754a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
87854a4b05cSYang Yingliang 			return ret;
87954a4b05cSYang Yingliang 		}
88054a4b05cSYang Yingliang 	} else {
88154a4b05cSYang Yingliang 		q->loss_model = CLG_RANDOM;
88254a4b05cSYang Yingliang 	}
88354a4b05cSYang Yingliang 
88454a4b05cSYang Yingliang 	if (tb[TCA_NETEM_DELAY_DIST]) {
88554a4b05cSYang Yingliang 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
88654a4b05cSYang Yingliang 		if (ret) {
88754a4b05cSYang Yingliang 			/* recover clg and loss_model, in case of
88854a4b05cSYang Yingliang 			 * q->clg and q->loss_model were modified
88954a4b05cSYang Yingliang 			 * in get_loss_clg()
89054a4b05cSYang Yingliang 			 */
89154a4b05cSYang Yingliang 			q->clg = old_clg;
89254a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
89354a4b05cSYang Yingliang 			return ret;
89454a4b05cSYang Yingliang 		}
89554a4b05cSYang Yingliang 	}
89654a4b05cSYang Yingliang 
89750612537SEric Dumazet 	sch->limit = qopt->limit;
8981da177e4SLinus Torvalds 
8991da177e4SLinus Torvalds 	q->latency = qopt->latency;
9001da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
9011da177e4SLinus Torvalds 	q->limit = qopt->limit;
9021da177e4SLinus Torvalds 	q->gap = qopt->gap;
9030dca51d3SStephen Hemminger 	q->counter = 0;
9041da177e4SLinus Torvalds 	q->loss = qopt->loss;
9051da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
9061da177e4SLinus Torvalds 
907bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
908bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
9090dca51d3SStephen Hemminger 	 */
910a362e0a7SStephen Hemminger 	if (q->gap)
9110dca51d3SStephen Hemminger 		q->reorder = ~0;
9120dca51d3SStephen Hemminger 
913265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
91449545a77SYang Yingliang 		get_correlation(q, tb[TCA_NETEM_CORR]);
9151da177e4SLinus Torvalds 
916265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
91749545a77SYang Yingliang 		get_reorder(q, tb[TCA_NETEM_REORDER]);
9181da177e4SLinus Torvalds 
919265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
92049545a77SYang Yingliang 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
9211da177e4SLinus Torvalds 
9227bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
92349545a77SYang Yingliang 		get_rate(q, tb[TCA_NETEM_RATE]);
9247bc0f28cSHagen Paul Pfeifer 
9256a031f67SYang Yingliang 	if (tb[TCA_NETEM_RATE64])
9266a031f67SYang Yingliang 		q->rate = max_t(u64, q->rate,
9276a031f67SYang Yingliang 				nla_get_u64(tb[TCA_NETEM_RATE64]));
9286a031f67SYang Yingliang 
929e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
930e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
931e4ae004bSEric Dumazet 
932661b7972Sstephen hemminger 	return ret;
9331da177e4SLinus Torvalds }
9341da177e4SLinus Torvalds 
9351e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
9361da177e4SLinus Torvalds {
9371da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
9381da177e4SLinus Torvalds 	int ret;
9391da177e4SLinus Torvalds 
9401da177e4SLinus Torvalds 	if (!opt)
9411da177e4SLinus Torvalds 		return -EINVAL;
9421da177e4SLinus Torvalds 
94359cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
9441da177e4SLinus Torvalds 
945661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
9461da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
94750612537SEric Dumazet 	if (ret)
948250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
9491da177e4SLinus Torvalds 	return ret;
9501da177e4SLinus Torvalds }
9511da177e4SLinus Torvalds 
9521da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
9531da177e4SLinus Torvalds {
9541da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
9551da177e4SLinus Torvalds 
95659cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
95750612537SEric Dumazet 	if (q->qdisc)
9581da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
9596373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
9601da177e4SLinus Torvalds }
9611da177e4SLinus Torvalds 
962661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
963661b7972Sstephen hemminger 			   struct sk_buff *skb)
964661b7972Sstephen hemminger {
965661b7972Sstephen hemminger 	struct nlattr *nest;
966661b7972Sstephen hemminger 
967661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
968661b7972Sstephen hemminger 	if (nest == NULL)
969661b7972Sstephen hemminger 		goto nla_put_failure;
970661b7972Sstephen hemminger 
971661b7972Sstephen hemminger 	switch (q->loss_model) {
972661b7972Sstephen hemminger 	case CLG_RANDOM:
973661b7972Sstephen hemminger 		/* legacy loss model */
974661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
975661b7972Sstephen hemminger 		return 0;	/* no data */
976661b7972Sstephen hemminger 
977661b7972Sstephen hemminger 	case CLG_4_STATES: {
978661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
979661b7972Sstephen hemminger 			.p13 = q->clg.a1,
980661b7972Sstephen hemminger 			.p31 = q->clg.a2,
981661b7972Sstephen hemminger 			.p32 = q->clg.a3,
982661b7972Sstephen hemminger 			.p14 = q->clg.a4,
983661b7972Sstephen hemminger 			.p23 = q->clg.a5,
984661b7972Sstephen hemminger 		};
985661b7972Sstephen hemminger 
9861b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
9871b34ec43SDavid S. Miller 			goto nla_put_failure;
988661b7972Sstephen hemminger 		break;
989661b7972Sstephen hemminger 	}
990661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
991661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
992661b7972Sstephen hemminger 			.p = q->clg.a1,
993661b7972Sstephen hemminger 			.r = q->clg.a2,
994661b7972Sstephen hemminger 			.h = q->clg.a3,
995661b7972Sstephen hemminger 			.k1 = q->clg.a4,
996661b7972Sstephen hemminger 		};
997661b7972Sstephen hemminger 
9981b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
9991b34ec43SDavid S. Miller 			goto nla_put_failure;
1000661b7972Sstephen hemminger 		break;
1001661b7972Sstephen hemminger 	}
1002661b7972Sstephen hemminger 	}
1003661b7972Sstephen hemminger 
1004661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
1005661b7972Sstephen hemminger 	return 0;
1006661b7972Sstephen hemminger 
1007661b7972Sstephen hemminger nla_put_failure:
1008661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
1009661b7972Sstephen hemminger 	return -1;
1010661b7972Sstephen hemminger }
1011661b7972Sstephen hemminger 
10121da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
10131da177e4SLinus Torvalds {
10141da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
1015861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
10161da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
10171da177e4SLinus Torvalds 	struct tc_netem_corr cor;
10180dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
1019c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
10207bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
10211da177e4SLinus Torvalds 
10221da177e4SLinus Torvalds 	qopt.latency = q->latency;
10231da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
10241da177e4SLinus Torvalds 	qopt.limit = q->limit;
10251da177e4SLinus Torvalds 	qopt.loss = q->loss;
10261da177e4SLinus Torvalds 	qopt.gap = q->gap;
10271da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
10281b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
10291b34ec43SDavid S. Miller 		goto nla_put_failure;
10301da177e4SLinus Torvalds 
10311da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
10321da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
10331da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
10341b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
10351b34ec43SDavid S. Miller 		goto nla_put_failure;
10360dca51d3SStephen Hemminger 
10370dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
10380dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
10391b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
10401b34ec43SDavid S. Miller 		goto nla_put_failure;
10410dca51d3SStephen Hemminger 
1042c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
1043c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
10441b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
10451b34ec43SDavid S. Miller 		goto nla_put_failure;
1046c865e5d9SStephen Hemminger 
10476a031f67SYang Yingliang 	if (q->rate >= (1ULL << 32)) {
10482a51c1e8SNicolas Dichtel 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
10492a51c1e8SNicolas Dichtel 				      TCA_NETEM_PAD))
10506a031f67SYang Yingliang 			goto nla_put_failure;
10516a031f67SYang Yingliang 		rate.rate = ~0U;
10526a031f67SYang Yingliang 	} else {
10537bc0f28cSHagen Paul Pfeifer 		rate.rate = q->rate;
10546a031f67SYang Yingliang 	}
105590b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
105690b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
105790b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
10581b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
10591b34ec43SDavid S. Miller 		goto nla_put_failure;
10607bc0f28cSHagen Paul Pfeifer 
1061e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1062e4ae004bSEric Dumazet 		goto nla_put_failure;
1063e4ae004bSEric Dumazet 
1064661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
1065661b7972Sstephen hemminger 		goto nla_put_failure;
1066661b7972Sstephen hemminger 
1067861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
10681da177e4SLinus Torvalds 
10691e90474cSPatrick McHardy nla_put_failure:
1070861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
10711da177e4SLinus Torvalds 	return -1;
10721da177e4SLinus Torvalds }
10731da177e4SLinus Torvalds 
107410f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
107510f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
107610f6dfcfSstephen hemminger {
107710f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
107810f6dfcfSstephen hemminger 
107950612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
108010f6dfcfSstephen hemminger 		return -ENOENT;
108110f6dfcfSstephen hemminger 
108210f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
108310f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
108410f6dfcfSstephen hemminger 
108510f6dfcfSstephen hemminger 	return 0;
108610f6dfcfSstephen hemminger }
108710f6dfcfSstephen hemminger 
108810f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
108910f6dfcfSstephen hemminger 		     struct Qdisc **old)
109010f6dfcfSstephen hemminger {
109110f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
109210f6dfcfSstephen hemminger 
109386a7996cSWANG Cong 	*old = qdisc_replace(sch, new, &q->qdisc);
109410f6dfcfSstephen hemminger 	return 0;
109510f6dfcfSstephen hemminger }
109610f6dfcfSstephen hemminger 
109710f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
109810f6dfcfSstephen hemminger {
109910f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
110010f6dfcfSstephen hemminger 	return q->qdisc;
110110f6dfcfSstephen hemminger }
110210f6dfcfSstephen hemminger 
110310f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
110410f6dfcfSstephen hemminger {
110510f6dfcfSstephen hemminger 	return 1;
110610f6dfcfSstephen hemminger }
110710f6dfcfSstephen hemminger 
110810f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
110910f6dfcfSstephen hemminger {
111010f6dfcfSstephen hemminger }
111110f6dfcfSstephen hemminger 
111210f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
111310f6dfcfSstephen hemminger {
111410f6dfcfSstephen hemminger 	if (!walker->stop) {
111510f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
111610f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
111710f6dfcfSstephen hemminger 				walker->stop = 1;
111810f6dfcfSstephen hemminger 				return;
111910f6dfcfSstephen hemminger 			}
112010f6dfcfSstephen hemminger 		walker->count++;
112110f6dfcfSstephen hemminger 	}
112210f6dfcfSstephen hemminger }
112310f6dfcfSstephen hemminger 
112410f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
112510f6dfcfSstephen hemminger 	.graft		=	netem_graft,
112610f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
112710f6dfcfSstephen hemminger 	.get		=	netem_get,
112810f6dfcfSstephen hemminger 	.put		=	netem_put,
112910f6dfcfSstephen hemminger 	.walk		=	netem_walk,
113010f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
113110f6dfcfSstephen hemminger };
113210f6dfcfSstephen hemminger 
113320fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
11341da177e4SLinus Torvalds 	.id		=	"netem",
113510f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
11361da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
11371da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
11381da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
113977be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
11401da177e4SLinus Torvalds 	.init		=	netem_init,
11411da177e4SLinus Torvalds 	.reset		=	netem_reset,
11421da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
11431da177e4SLinus Torvalds 	.change		=	netem_change,
11441da177e4SLinus Torvalds 	.dump		=	netem_dump,
11451da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
11461da177e4SLinus Torvalds };
11471da177e4SLinus Torvalds 
11481da177e4SLinus Torvalds 
11491da177e4SLinus Torvalds static int __init netem_module_init(void)
11501da177e4SLinus Torvalds {
1151eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
11521da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
11531da177e4SLinus Torvalds }
11541da177e4SLinus Torvalds static void __exit netem_module_exit(void)
11551da177e4SLinus Torvalds {
11561da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
11571da177e4SLinus Torvalds }
11581da177e4SLinus Torvalds module_init(netem_module_init)
11591da177e4SLinus Torvalds module_exit(netem_module_exit)
11601da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1161