xref: /openbmc/linux/net/sched/sch_netem.c (revision cd961c2c)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
261da177e4SLinus Torvalds 
27dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
281da177e4SLinus Torvalds #include <net/pkt_sched.h>
291da177e4SLinus Torvalds 
30250a65f7Sstephen hemminger #define VERSION "1.3"
31eb229c4cSStephen Hemminger 
321da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
331da177e4SLinus Torvalds 	====================================
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
361da177e4SLinus Torvalds 		 Network Emulation Tool
371da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	 ----------------------------------------------------------------
401da177e4SLinus Torvalds 
411da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
421da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
431da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
441da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
451da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
461da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
471da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
481da177e4SLinus Torvalds 
491da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
501da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
511da177e4SLinus Torvalds 	 control either since that can be handled by using token
521da177e4SLinus Torvalds 	 bucket or other rate control.
53661b7972Sstephen hemminger 
54661b7972Sstephen hemminger      Correlated Loss Generator models
55661b7972Sstephen hemminger 
56661b7972Sstephen hemminger 	Added generation of correlated loss according to the
57661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
58661b7972Sstephen hemminger 
59661b7972Sstephen hemminger 	References:
60661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
63661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
64661b7972Sstephen hemminger 
65661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
671da177e4SLinus Torvalds */
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds struct netem_sched_data {
7050612537SEric Dumazet 	/* internal t(ime)fifo qdisc uses sch->q and sch->limit */
7150612537SEric Dumazet 
7250612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
731da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7450612537SEric Dumazet 
7559cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
761da177e4SLinus Torvalds 
77b407621cSStephen Hemminger 	psched_tdiff_t latency;
78b407621cSStephen Hemminger 	psched_tdiff_t jitter;
79b407621cSStephen Hemminger 
801da177e4SLinus Torvalds 	u32 loss;
811da177e4SLinus Torvalds 	u32 limit;
821da177e4SLinus Torvalds 	u32 counter;
831da177e4SLinus Torvalds 	u32 gap;
841da177e4SLinus Torvalds 	u32 duplicate;
850dca51d3SStephen Hemminger 	u32 reorder;
86c865e5d9SStephen Hemminger 	u32 corrupt;
877bc0f28cSHagen Paul Pfeifer 	u32 rate;
8890b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
8990b41a1cSHagen Paul Pfeifer 	u32 cell_size;
9090b41a1cSHagen Paul Pfeifer 	u32 cell_size_reciprocal;
9190b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds 	struct crndstate {
94b407621cSStephen Hemminger 		u32 last;
95b407621cSStephen Hemminger 		u32 rho;
96c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
971da177e4SLinus Torvalds 
981da177e4SLinus Torvalds 	struct disttable {
991da177e4SLinus Torvalds 		u32  size;
1001da177e4SLinus Torvalds 		s16 table[0];
1011da177e4SLinus Torvalds 	} *delay_dist;
102661b7972Sstephen hemminger 
103661b7972Sstephen hemminger 	enum  {
104661b7972Sstephen hemminger 		CLG_RANDOM,
105661b7972Sstephen hemminger 		CLG_4_STATES,
106661b7972Sstephen hemminger 		CLG_GILB_ELL,
107661b7972Sstephen hemminger 	} loss_model;
108661b7972Sstephen hemminger 
109661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
110661b7972Sstephen hemminger 	struct clgstate {
111661b7972Sstephen hemminger 		/* state of the Markov chain */
112661b7972Sstephen hemminger 		u8 state;
113661b7972Sstephen hemminger 
114661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
115661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
116661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
117661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
118661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
119661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
120661b7972Sstephen hemminger 	} clg;
121661b7972Sstephen hemminger 
1221da177e4SLinus Torvalds };
1231da177e4SLinus Torvalds 
12450612537SEric Dumazet /* Time stamp put into socket buffer control block
12550612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
12650612537SEric Dumazet  */
1271da177e4SLinus Torvalds struct netem_skb_cb {
1281da177e4SLinus Torvalds 	psched_time_t	time_to_send;
1291da177e4SLinus Torvalds };
1301da177e4SLinus Torvalds 
1315f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1325f86173bSJussi Kivilinna {
13316bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
134175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1355f86173bSJussi Kivilinna }
1365f86173bSJussi Kivilinna 
1371da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1381da177e4SLinus Torvalds  * Use entropy source for initial seed.
1391da177e4SLinus Torvalds  */
1401da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1411da177e4SLinus Torvalds {
1421da177e4SLinus Torvalds 	state->rho = rho;
1431da177e4SLinus Torvalds 	state->last = net_random();
1441da177e4SLinus Torvalds }
1451da177e4SLinus Torvalds 
1461da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1471da177e4SLinus Torvalds  * Next number depends on last value.
1481da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1491da177e4SLinus Torvalds  */
150b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1511da177e4SLinus Torvalds {
1521da177e4SLinus Torvalds 	u64 value, rho;
1531da177e4SLinus Torvalds 	unsigned long answer;
1541da177e4SLinus Torvalds 
155bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1561da177e4SLinus Torvalds 		return net_random();
1571da177e4SLinus Torvalds 
1581da177e4SLinus Torvalds 	value = net_random();
1591da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1601da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1611da177e4SLinus Torvalds 	state->last = answer;
1621da177e4SLinus Torvalds 	return answer;
1631da177e4SLinus Torvalds }
1641da177e4SLinus Torvalds 
165661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
166661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
167661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
168661b7972Sstephen hemminger  */
169661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
170661b7972Sstephen hemminger {
171661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
172661b7972Sstephen hemminger 	u32 rnd = net_random();
173661b7972Sstephen hemminger 
174661b7972Sstephen hemminger 	/*
17525985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
176661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
177661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
178661b7972Sstephen hemminger 	 * The four states correspond to:
179661b7972Sstephen hemminger 	 *   1 => successfully transmitted packets within a gap period
180661b7972Sstephen hemminger 	 *   4 => isolated losses within a gap period
181661b7972Sstephen hemminger 	 *   3 => lost packets within a burst period
182661b7972Sstephen hemminger 	 *   2 => successfully transmitted packets within a burst period
183661b7972Sstephen hemminger 	 */
184661b7972Sstephen hemminger 	switch (clg->state) {
185661b7972Sstephen hemminger 	case 1:
186661b7972Sstephen hemminger 		if (rnd < clg->a4) {
187661b7972Sstephen hemminger 			clg->state = 4;
188661b7972Sstephen hemminger 			return true;
189661b7972Sstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1) {
190661b7972Sstephen hemminger 			clg->state = 3;
191661b7972Sstephen hemminger 			return true;
192661b7972Sstephen hemminger 		} else if (clg->a1 < rnd)
193661b7972Sstephen hemminger 			clg->state = 1;
194661b7972Sstephen hemminger 
195661b7972Sstephen hemminger 		break;
196661b7972Sstephen hemminger 	case 2:
197661b7972Sstephen hemminger 		if (rnd < clg->a5) {
198661b7972Sstephen hemminger 			clg->state = 3;
199661b7972Sstephen hemminger 			return true;
200661b7972Sstephen hemminger 		} else
201661b7972Sstephen hemminger 			clg->state = 2;
202661b7972Sstephen hemminger 
203661b7972Sstephen hemminger 		break;
204661b7972Sstephen hemminger 	case 3:
205661b7972Sstephen hemminger 		if (rnd < clg->a3)
206661b7972Sstephen hemminger 			clg->state = 2;
207661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
208661b7972Sstephen hemminger 			clg->state = 1;
209661b7972Sstephen hemminger 			return true;
210661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
211661b7972Sstephen hemminger 			clg->state = 3;
212661b7972Sstephen hemminger 			return true;
213661b7972Sstephen hemminger 		}
214661b7972Sstephen hemminger 		break;
215661b7972Sstephen hemminger 	case 4:
216661b7972Sstephen hemminger 		clg->state = 1;
217661b7972Sstephen hemminger 		break;
218661b7972Sstephen hemminger 	}
219661b7972Sstephen hemminger 
220661b7972Sstephen hemminger 	return false;
221661b7972Sstephen hemminger }
222661b7972Sstephen hemminger 
223661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
224661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
225661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
226661b7972Sstephen hemminger  *
22725985edcSLucas De Marchi  * Makes a comparison between random number and the transition
228661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
22925985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
230661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
231661b7972Sstephen hemminger  * packet will be transmitted or lost.
232661b7972Sstephen hemminger  */
233661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
234661b7972Sstephen hemminger {
235661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
236661b7972Sstephen hemminger 
237661b7972Sstephen hemminger 	switch (clg->state) {
238661b7972Sstephen hemminger 	case 1:
239661b7972Sstephen hemminger 		if (net_random() < clg->a1)
240661b7972Sstephen hemminger 			clg->state = 2;
241661b7972Sstephen hemminger 		if (net_random() < clg->a4)
242661b7972Sstephen hemminger 			return true;
243661b7972Sstephen hemminger 	case 2:
244661b7972Sstephen hemminger 		if (net_random() < clg->a2)
245661b7972Sstephen hemminger 			clg->state = 1;
246661b7972Sstephen hemminger 		if (clg->a3 > net_random())
247661b7972Sstephen hemminger 			return true;
248661b7972Sstephen hemminger 	}
249661b7972Sstephen hemminger 
250661b7972Sstephen hemminger 	return false;
251661b7972Sstephen hemminger }
252661b7972Sstephen hemminger 
253661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
254661b7972Sstephen hemminger {
255661b7972Sstephen hemminger 	switch (q->loss_model) {
256661b7972Sstephen hemminger 	case CLG_RANDOM:
257661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
258661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
259661b7972Sstephen hemminger 
260661b7972Sstephen hemminger 	case CLG_4_STATES:
261661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
262661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
263661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
264661b7972Sstephen hemminger 		* the kernel logs
265661b7972Sstephen hemminger 		*/
266661b7972Sstephen hemminger 		return loss_4state(q);
267661b7972Sstephen hemminger 
268661b7972Sstephen hemminger 	case CLG_GILB_ELL:
269661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
270661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
271661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
272661b7972Sstephen hemminger 		* the kernel logs
273661b7972Sstephen hemminger 		*/
274661b7972Sstephen hemminger 		return loss_gilb_ell(q);
275661b7972Sstephen hemminger 	}
276661b7972Sstephen hemminger 
277661b7972Sstephen hemminger 	return false;	/* not reached */
278661b7972Sstephen hemminger }
279661b7972Sstephen hemminger 
280661b7972Sstephen hemminger 
2811da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
2821da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
2831da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
2841da177e4SLinus Torvalds  */
285b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
286b407621cSStephen Hemminger 				struct crndstate *state,
287b407621cSStephen Hemminger 				const struct disttable *dist)
2881da177e4SLinus Torvalds {
289b407621cSStephen Hemminger 	psched_tdiff_t x;
290b407621cSStephen Hemminger 	long t;
291b407621cSStephen Hemminger 	u32 rnd;
2921da177e4SLinus Torvalds 
2931da177e4SLinus Torvalds 	if (sigma == 0)
2941da177e4SLinus Torvalds 		return mu;
2951da177e4SLinus Torvalds 
2961da177e4SLinus Torvalds 	rnd = get_crandom(state);
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds 	/* default uniform distribution */
2991da177e4SLinus Torvalds 	if (dist == NULL)
3001da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
3011da177e4SLinus Torvalds 
3021da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3031da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3041da177e4SLinus Torvalds 	if (x >= 0)
3051da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3061da177e4SLinus Torvalds 	else
3071da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3081da177e4SLinus Torvalds 
3091da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3101da177e4SLinus Torvalds }
3111da177e4SLinus Torvalds 
31290b41a1cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
3137bc0f28cSHagen Paul Pfeifer {
31490b41a1cSHagen Paul Pfeifer 	u64 ticks;
315fc33cc72SEric Dumazet 
31690b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
31790b41a1cSHagen Paul Pfeifer 
31890b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
31990b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
32090b41a1cSHagen Paul Pfeifer 
32190b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
32290b41a1cSHagen Paul Pfeifer 			cells++;
32390b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
32490b41a1cSHagen Paul Pfeifer 	}
32590b41a1cSHagen Paul Pfeifer 
32690b41a1cSHagen Paul Pfeifer 	ticks = (u64)len * NSEC_PER_SEC;
32790b41a1cSHagen Paul Pfeifer 
32890b41a1cSHagen Paul Pfeifer 	do_div(ticks, q->rate);
329fc33cc72SEric Dumazet 	return PSCHED_NS2TICKS(ticks);
3307bc0f28cSHagen Paul Pfeifer }
3317bc0f28cSHagen Paul Pfeifer 
33250612537SEric Dumazet static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
33350612537SEric Dumazet {
33450612537SEric Dumazet 	struct sk_buff_head *list = &sch->q;
33550612537SEric Dumazet 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
33650612537SEric Dumazet 	struct sk_buff *skb;
33750612537SEric Dumazet 
33850612537SEric Dumazet 	if (likely(skb_queue_len(list) < sch->limit)) {
33950612537SEric Dumazet 		skb = skb_peek_tail(list);
34050612537SEric Dumazet 		/* Optimize for add at tail */
34150612537SEric Dumazet 		if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
34250612537SEric Dumazet 			return qdisc_enqueue_tail(nskb, sch);
34350612537SEric Dumazet 
34450612537SEric Dumazet 		skb_queue_reverse_walk(list, skb) {
34550612537SEric Dumazet 			if (tnext >= netem_skb_cb(skb)->time_to_send)
34650612537SEric Dumazet 				break;
34750612537SEric Dumazet 		}
34850612537SEric Dumazet 
34950612537SEric Dumazet 		__skb_queue_after(list, skb, nskb);
35050612537SEric Dumazet 		sch->qstats.backlog += qdisc_pkt_len(nskb);
35150612537SEric Dumazet 		return NET_XMIT_SUCCESS;
35250612537SEric Dumazet 	}
35350612537SEric Dumazet 
35450612537SEric Dumazet 	return qdisc_reshape_fail(nskb, sch);
35550612537SEric Dumazet }
35650612537SEric Dumazet 
3570afb51e7SStephen Hemminger /*
3580afb51e7SStephen Hemminger  * Insert one skb into qdisc.
3590afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
3600afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
3610afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
3620afb51e7SStephen Hemminger  */
3631da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
3641da177e4SLinus Torvalds {
3651da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
36689e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
36789e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
3680afb51e7SStephen Hemminger 	struct sk_buff *skb2;
3691da177e4SLinus Torvalds 	int ret;
3700afb51e7SStephen Hemminger 	int count = 1;
3711da177e4SLinus Torvalds 
3720afb51e7SStephen Hemminger 	/* Random duplication */
3730afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
3740afb51e7SStephen Hemminger 		++count;
3750afb51e7SStephen Hemminger 
376661b7972Sstephen hemminger 	/* Drop packet? */
377661b7972Sstephen hemminger 	if (loss_event(q))
3780afb51e7SStephen Hemminger 		--count;
3790afb51e7SStephen Hemminger 
3800afb51e7SStephen Hemminger 	if (count == 0) {
3811da177e4SLinus Torvalds 		sch->qstats.drops++;
3821da177e4SLinus Torvalds 		kfree_skb(skb);
383c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3841da177e4SLinus Torvalds 	}
3851da177e4SLinus Torvalds 
3864e8a5201SDavid S. Miller 	skb_orphan(skb);
3874e8a5201SDavid S. Miller 
3880afb51e7SStephen Hemminger 	/*
3890afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
3900afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
3910afb51e7SStephen Hemminger 	 * skb will be queued.
392d5d75cd6SStephen Hemminger 	 */
3930afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
3947698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
3950afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
3960afb51e7SStephen Hemminger 		q->duplicate = 0;
397d5d75cd6SStephen Hemminger 
3985f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
3990afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4001da177e4SLinus Torvalds 	}
4011da177e4SLinus Torvalds 
402c865e5d9SStephen Hemminger 	/*
403c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
404c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
405c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
406c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
407c865e5d9SStephen Hemminger 	 */
408c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
409f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
410f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
411f64f9e71SJoe Perches 		     skb_checksum_help(skb))) {
412c865e5d9SStephen Hemminger 			sch->qstats.drops++;
413c865e5d9SStephen Hemminger 			return NET_XMIT_DROP;
414c865e5d9SStephen Hemminger 		}
415c865e5d9SStephen Hemminger 
416c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
417c865e5d9SStephen Hemminger 	}
418c865e5d9SStephen Hemminger 
4195f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
420f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
421a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
422f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
4230f9f32acSStephen Hemminger 		psched_time_t now;
42407aaa115SStephen Hemminger 		psched_tdiff_t delay;
42507aaa115SStephen Hemminger 
42607aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
42707aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
42807aaa115SStephen Hemminger 
4293bebcda2SPatrick McHardy 		now = psched_get_time();
4307bc0f28cSHagen Paul Pfeifer 
4317bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
43250612537SEric Dumazet 			struct sk_buff_head *list = &sch->q;
4337bc0f28cSHagen Paul Pfeifer 
43490b41a1cSHagen Paul Pfeifer 			delay += packet_len_2_sched_time(skb->len, q);
4357bc0f28cSHagen Paul Pfeifer 
4367bc0f28cSHagen Paul Pfeifer 			if (!skb_queue_empty(list)) {
4377bc0f28cSHagen Paul Pfeifer 				/*
4387bc0f28cSHagen Paul Pfeifer 				 * Last packet in queue is reference point (now).
4397bc0f28cSHagen Paul Pfeifer 				 * First packet in queue is already in flight,
4407bc0f28cSHagen Paul Pfeifer 				 * calculate this time bonus and substract
4417bc0f28cSHagen Paul Pfeifer 				 * from delay.
4427bc0f28cSHagen Paul Pfeifer 				 */
4437bc0f28cSHagen Paul Pfeifer 				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
4447bc0f28cSHagen Paul Pfeifer 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
4457bc0f28cSHagen Paul Pfeifer 			}
4467bc0f28cSHagen Paul Pfeifer 		}
4477bc0f28cSHagen Paul Pfeifer 
4487c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
4491da177e4SLinus Torvalds 		++q->counter;
45050612537SEric Dumazet 		ret = tfifo_enqueue(skb, sch);
4511da177e4SLinus Torvalds 	} else {
4520dca51d3SStephen Hemminger 		/*
4530dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
4540dca51d3SStephen Hemminger 		 * of the queue.
4550dca51d3SStephen Hemminger 		 */
4563bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
4570dca51d3SStephen Hemminger 		q->counter = 0;
4588ba25dadSJarek Poplawski 
45950612537SEric Dumazet 		__skb_queue_head(&sch->q, skb);
460eb101924SHagen Paul Pfeifer 		sch->qstats.backlog += qdisc_pkt_len(skb);
461eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
4628ba25dadSJarek Poplawski 		ret = NET_XMIT_SUCCESS;
4631da177e4SLinus Torvalds 	}
4641da177e4SLinus Torvalds 
46510f6dfcfSstephen hemminger 	if (ret != NET_XMIT_SUCCESS) {
46610f6dfcfSstephen hemminger 		if (net_xmit_drop_count(ret)) {
4671da177e4SLinus Torvalds 			sch->qstats.drops++;
46810f6dfcfSstephen hemminger 			return ret;
46910f6dfcfSstephen hemminger 		}
470378a2f09SJarek Poplawski 	}
4711da177e4SLinus Torvalds 
47210f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
4731da177e4SLinus Torvalds }
4741da177e4SLinus Torvalds 
4751da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch)
4761da177e4SLinus Torvalds {
4771da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
47850612537SEric Dumazet 	unsigned int len;
4791da177e4SLinus Torvalds 
48050612537SEric Dumazet 	len = qdisc_queue_drop(sch);
48150612537SEric Dumazet 	if (!len && q->qdisc && q->qdisc->ops->drop)
48250612537SEric Dumazet 	    len = q->qdisc->ops->drop(q->qdisc);
48350612537SEric Dumazet 	if (len)
4841da177e4SLinus Torvalds 		sch->qstats.drops++;
48550612537SEric Dumazet 
4861da177e4SLinus Torvalds 	return len;
4871da177e4SLinus Torvalds }
4881da177e4SLinus Torvalds 
4891da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
4901da177e4SLinus Torvalds {
4911da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4921da177e4SLinus Torvalds 	struct sk_buff *skb;
4931da177e4SLinus Torvalds 
494fd245a4aSEric Dumazet 	if (qdisc_is_throttled(sch))
49511274e5aSStephen Hemminger 		return NULL;
49611274e5aSStephen Hemminger 
49750612537SEric Dumazet tfifo_dequeue:
49850612537SEric Dumazet 	skb = qdisc_peek_head(sch);
499771018e7SStephen Hemminger 	if (skb) {
5005f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
5010f9f32acSStephen Hemminger 
5020f9f32acSStephen Hemminger 		/* if more time remaining? */
50350612537SEric Dumazet 		if (cb->time_to_send <= psched_get_time()) {
504cd961c2cSEric Dumazet 			__skb_unlink(skb, &sch->q);
505cd961c2cSEric Dumazet 			sch->qstats.backlog -= qdisc_pkt_len(skb);
50603c05f0dSJarek Poplawski 
5078caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
5088caf1539SJarek Poplawski 			/*
5098caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
5108caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
5118caf1539SJarek Poplawski 			 */
5128caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
5138caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
5148caf1539SJarek Poplawski #endif
51510f6dfcfSstephen hemminger 
51650612537SEric Dumazet 			if (q->qdisc) {
51750612537SEric Dumazet 				int err = qdisc_enqueue(skb, q->qdisc);
51850612537SEric Dumazet 
51950612537SEric Dumazet 				if (unlikely(err != NET_XMIT_SUCCESS)) {
52050612537SEric Dumazet 					if (net_xmit_drop_count(err)) {
52150612537SEric Dumazet 						sch->qstats.drops++;
52250612537SEric Dumazet 						qdisc_tree_decrease_qlen(sch, 1);
52350612537SEric Dumazet 					}
52450612537SEric Dumazet 				}
52550612537SEric Dumazet 				goto tfifo_dequeue;
52650612537SEric Dumazet 			}
52750612537SEric Dumazet deliver:
52810f6dfcfSstephen hemminger 			qdisc_unthrottled(sch);
52910f6dfcfSstephen hemminger 			qdisc_bstats_update(sch, skb);
5300f9f32acSStephen Hemminger 			return skb;
53111274e5aSStephen Hemminger 		}
53207aaa115SStephen Hemminger 
53350612537SEric Dumazet 		if (q->qdisc) {
53450612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
53550612537SEric Dumazet 			if (skb)
53650612537SEric Dumazet 				goto deliver;
53750612537SEric Dumazet 		}
53811274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
5390f9f32acSStephen Hemminger 	}
5400f9f32acSStephen Hemminger 
54150612537SEric Dumazet 	if (q->qdisc) {
54250612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
54350612537SEric Dumazet 		if (skb)
54450612537SEric Dumazet 			goto deliver;
54550612537SEric Dumazet 	}
5460f9f32acSStephen Hemminger 	return NULL;
5471da177e4SLinus Torvalds }
5481da177e4SLinus Torvalds 
5491da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
5501da177e4SLinus Torvalds {
5511da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5521da177e4SLinus Torvalds 
55350612537SEric Dumazet 	qdisc_reset_queue(sch);
55450612537SEric Dumazet 	if (q->qdisc)
5551da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
55659cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
5571da177e4SLinus Torvalds }
5581da177e4SLinus Torvalds 
5596373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
5606373a9a2Sstephen hemminger {
5616373a9a2Sstephen hemminger 	if (d) {
5626373a9a2Sstephen hemminger 		if (is_vmalloc_addr(d))
5636373a9a2Sstephen hemminger 			vfree(d);
5646373a9a2Sstephen hemminger 		else
5656373a9a2Sstephen hemminger 			kfree(d);
5666373a9a2Sstephen hemminger 	}
5676373a9a2Sstephen hemminger }
5686373a9a2Sstephen hemminger 
5691da177e4SLinus Torvalds /*
5701da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
5711da177e4SLinus Torvalds  * signed 16 bit values.
5721da177e4SLinus Torvalds  */
5731e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
5741da177e4SLinus Torvalds {
5751da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5766373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
5771e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
5787698b4fcSDavid S. Miller 	spinlock_t *root_lock;
5791da177e4SLinus Torvalds 	struct disttable *d;
5801da177e4SLinus Torvalds 	int i;
5816373a9a2Sstephen hemminger 	size_t s;
5821da177e4SLinus Torvalds 
583df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
5841da177e4SLinus Torvalds 		return -EINVAL;
5851da177e4SLinus Torvalds 
5866373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
587bb52c7acSEric Dumazet 	d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
5886373a9a2Sstephen hemminger 	if (!d)
5896373a9a2Sstephen hemminger 		d = vmalloc(s);
5901da177e4SLinus Torvalds 	if (!d)
5911da177e4SLinus Torvalds 		return -ENOMEM;
5921da177e4SLinus Torvalds 
5931da177e4SLinus Torvalds 	d->size = n;
5941da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
5951da177e4SLinus Torvalds 		d->table[i] = data[i];
5961da177e4SLinus Torvalds 
597102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
5987698b4fcSDavid S. Miller 
5997698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
600bb52c7acSEric Dumazet 	swap(q->delay_dist, d);
6017698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
602bb52c7acSEric Dumazet 
603bb52c7acSEric Dumazet 	dist_free(d);
6041da177e4SLinus Torvalds 	return 0;
6051da177e4SLinus Torvalds }
6061da177e4SLinus Torvalds 
607265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
6081da177e4SLinus Torvalds {
6091da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6101e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
6111da177e4SLinus Torvalds 
6121da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
6131da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
6141da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
6151da177e4SLinus Torvalds }
6161da177e4SLinus Torvalds 
617265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
6180dca51d3SStephen Hemminger {
6190dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6201e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
6210dca51d3SStephen Hemminger 
6220dca51d3SStephen Hemminger 	q->reorder = r->probability;
6230dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
6240dca51d3SStephen Hemminger }
6250dca51d3SStephen Hemminger 
626265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
627c865e5d9SStephen Hemminger {
628c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6291e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
630c865e5d9SStephen Hemminger 
631c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
632c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
633c865e5d9SStephen Hemminger }
634c865e5d9SStephen Hemminger 
6357bc0f28cSHagen Paul Pfeifer static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
6367bc0f28cSHagen Paul Pfeifer {
6377bc0f28cSHagen Paul Pfeifer 	struct netem_sched_data *q = qdisc_priv(sch);
6387bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
6397bc0f28cSHagen Paul Pfeifer 
6407bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
64190b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
64290b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
64390b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
64490b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
64590b41a1cSHagen Paul Pfeifer 	q->cell_overhead = r->cell_overhead;
6467bc0f28cSHagen Paul Pfeifer }
6477bc0f28cSHagen Paul Pfeifer 
648661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
649661b7972Sstephen hemminger {
650661b7972Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
651661b7972Sstephen hemminger 	const struct nlattr *la;
652661b7972Sstephen hemminger 	int rem;
653661b7972Sstephen hemminger 
654661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
655661b7972Sstephen hemminger 		u16 type = nla_type(la);
656661b7972Sstephen hemminger 
657661b7972Sstephen hemminger 		switch(type) {
658661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
659661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
660661b7972Sstephen hemminger 
6612494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
662661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
663661b7972Sstephen hemminger 				return -EINVAL;
664661b7972Sstephen hemminger 			}
665661b7972Sstephen hemminger 
666661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
667661b7972Sstephen hemminger 
668661b7972Sstephen hemminger 			q->clg.state = 1;
669661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
670661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
671661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
672661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
673661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
674661b7972Sstephen hemminger 			break;
675661b7972Sstephen hemminger 		}
676661b7972Sstephen hemminger 
677661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
678661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
679661b7972Sstephen hemminger 
6802494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
6812494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
682661b7972Sstephen hemminger 				return -EINVAL;
683661b7972Sstephen hemminger 			}
684661b7972Sstephen hemminger 
685661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
686661b7972Sstephen hemminger 			q->clg.state = 1;
687661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
688661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
689661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
690661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
691661b7972Sstephen hemminger 			break;
692661b7972Sstephen hemminger 		}
693661b7972Sstephen hemminger 
694661b7972Sstephen hemminger 		default:
695661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
696661b7972Sstephen hemminger 			return -EINVAL;
697661b7972Sstephen hemminger 		}
698661b7972Sstephen hemminger 	}
699661b7972Sstephen hemminger 
700661b7972Sstephen hemminger 	return 0;
701661b7972Sstephen hemminger }
702661b7972Sstephen hemminger 
70327a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
70427a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
70527a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
70627a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
7077bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
708661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
70927a3421eSPatrick McHardy };
71027a3421eSPatrick McHardy 
7112c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
7122c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
7132c10b32bSThomas Graf {
7142c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
7152c10b32bSThomas Graf 
716661b7972Sstephen hemminger 	if (nested_len < 0) {
717661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
7182c10b32bSThomas Graf 		return -EINVAL;
719661b7972Sstephen hemminger 	}
720661b7972Sstephen hemminger 
7212c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
7222c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
7232c10b32bSThomas Graf 				 nested_len, policy);
724661b7972Sstephen hemminger 
7252c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
7262c10b32bSThomas Graf 	return 0;
7272c10b32bSThomas Graf }
7282c10b32bSThomas Graf 
729c865e5d9SStephen Hemminger /* Parse netlink message to set options */
7301e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
7311da177e4SLinus Torvalds {
7321da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
733b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
7341da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
7351da177e4SLinus Torvalds 	int ret;
7361da177e4SLinus Torvalds 
737b03f4672SPatrick McHardy 	if (opt == NULL)
7381da177e4SLinus Torvalds 		return -EINVAL;
7391da177e4SLinus Torvalds 
7402c10b32bSThomas Graf 	qopt = nla_data(opt);
7412c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
742b03f4672SPatrick McHardy 	if (ret < 0)
743b03f4672SPatrick McHardy 		return ret;
744b03f4672SPatrick McHardy 
74550612537SEric Dumazet 	sch->limit = qopt->limit;
7461da177e4SLinus Torvalds 
7471da177e4SLinus Torvalds 	q->latency = qopt->latency;
7481da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
7491da177e4SLinus Torvalds 	q->limit = qopt->limit;
7501da177e4SLinus Torvalds 	q->gap = qopt->gap;
7510dca51d3SStephen Hemminger 	q->counter = 0;
7521da177e4SLinus Torvalds 	q->loss = qopt->loss;
7531da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
7541da177e4SLinus Torvalds 
755bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
756bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
7570dca51d3SStephen Hemminger 	 */
758a362e0a7SStephen Hemminger 	if (q->gap)
7590dca51d3SStephen Hemminger 		q->reorder = ~0;
7600dca51d3SStephen Hemminger 
761265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
762265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
7631da177e4SLinus Torvalds 
7641e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
7651e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
7661da177e4SLinus Torvalds 		if (ret)
7671da177e4SLinus Torvalds 			return ret;
7681da177e4SLinus Torvalds 	}
769c865e5d9SStephen Hemminger 
770265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
771265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
7721da177e4SLinus Torvalds 
773265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
774265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
7751da177e4SLinus Torvalds 
7767bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
7777bc0f28cSHagen Paul Pfeifer 		get_rate(sch, tb[TCA_NETEM_RATE]);
7787bc0f28cSHagen Paul Pfeifer 
779661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
780661b7972Sstephen hemminger 	if (tb[TCA_NETEM_LOSS])
781661b7972Sstephen hemminger 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
782661b7972Sstephen hemminger 
783661b7972Sstephen hemminger 	return ret;
7841da177e4SLinus Torvalds }
7851da177e4SLinus Torvalds 
7861e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
7871da177e4SLinus Torvalds {
7881da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7891da177e4SLinus Torvalds 	int ret;
7901da177e4SLinus Torvalds 
7911da177e4SLinus Torvalds 	if (!opt)
7921da177e4SLinus Torvalds 		return -EINVAL;
7931da177e4SLinus Torvalds 
79459cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
7951da177e4SLinus Torvalds 
796661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
7971da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
79850612537SEric Dumazet 	if (ret)
799250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
8001da177e4SLinus Torvalds 	return ret;
8011da177e4SLinus Torvalds }
8021da177e4SLinus Torvalds 
8031da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
8041da177e4SLinus Torvalds {
8051da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
8061da177e4SLinus Torvalds 
80759cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
80850612537SEric Dumazet 	if (q->qdisc)
8091da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
8106373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
8111da177e4SLinus Torvalds }
8121da177e4SLinus Torvalds 
813661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
814661b7972Sstephen hemminger 			   struct sk_buff *skb)
815661b7972Sstephen hemminger {
816661b7972Sstephen hemminger 	struct nlattr *nest;
817661b7972Sstephen hemminger 
818661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
819661b7972Sstephen hemminger 	if (nest == NULL)
820661b7972Sstephen hemminger 		goto nla_put_failure;
821661b7972Sstephen hemminger 
822661b7972Sstephen hemminger 	switch (q->loss_model) {
823661b7972Sstephen hemminger 	case CLG_RANDOM:
824661b7972Sstephen hemminger 		/* legacy loss model */
825661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
826661b7972Sstephen hemminger 		return 0;	/* no data */
827661b7972Sstephen hemminger 
828661b7972Sstephen hemminger 	case CLG_4_STATES: {
829661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
830661b7972Sstephen hemminger 			.p13 = q->clg.a1,
831661b7972Sstephen hemminger 			.p31 = q->clg.a2,
832661b7972Sstephen hemminger 			.p32 = q->clg.a3,
833661b7972Sstephen hemminger 			.p14 = q->clg.a4,
834661b7972Sstephen hemminger 			.p23 = q->clg.a5,
835661b7972Sstephen hemminger 		};
836661b7972Sstephen hemminger 
837661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
838661b7972Sstephen hemminger 		break;
839661b7972Sstephen hemminger 	}
840661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
841661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
842661b7972Sstephen hemminger 			.p = q->clg.a1,
843661b7972Sstephen hemminger 			.r = q->clg.a2,
844661b7972Sstephen hemminger 			.h = q->clg.a3,
845661b7972Sstephen hemminger 			.k1 = q->clg.a4,
846661b7972Sstephen hemminger 		};
847661b7972Sstephen hemminger 
848661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
849661b7972Sstephen hemminger 		break;
850661b7972Sstephen hemminger 	}
851661b7972Sstephen hemminger 	}
852661b7972Sstephen hemminger 
853661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
854661b7972Sstephen hemminger 	return 0;
855661b7972Sstephen hemminger 
856661b7972Sstephen hemminger nla_put_failure:
857661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
858661b7972Sstephen hemminger 	return -1;
859661b7972Sstephen hemminger }
860661b7972Sstephen hemminger 
8611da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
8621da177e4SLinus Torvalds {
8631da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
864861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
8651da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
8661da177e4SLinus Torvalds 	struct tc_netem_corr cor;
8670dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
868c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
8697bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
8701da177e4SLinus Torvalds 
8711da177e4SLinus Torvalds 	qopt.latency = q->latency;
8721da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
8731da177e4SLinus Torvalds 	qopt.limit = q->limit;
8741da177e4SLinus Torvalds 	qopt.loss = q->loss;
8751da177e4SLinus Torvalds 	qopt.gap = q->gap;
8761da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
8771e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
8781da177e4SLinus Torvalds 
8791da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
8801da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
8811da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
8821e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
8830dca51d3SStephen Hemminger 
8840dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
8850dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
8861e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
8870dca51d3SStephen Hemminger 
888c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
889c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
8901e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
891c865e5d9SStephen Hemminger 
8927bc0f28cSHagen Paul Pfeifer 	rate.rate = q->rate;
89390b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
89490b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
89590b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
8967bc0f28cSHagen Paul Pfeifer 	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
8977bc0f28cSHagen Paul Pfeifer 
898661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
899661b7972Sstephen hemminger 		goto nla_put_failure;
900661b7972Sstephen hemminger 
901861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
9021da177e4SLinus Torvalds 
9031e90474cSPatrick McHardy nla_put_failure:
904861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
9051da177e4SLinus Torvalds 	return -1;
9061da177e4SLinus Torvalds }
9071da177e4SLinus Torvalds 
90810f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
90910f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
91010f6dfcfSstephen hemminger {
91110f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
91210f6dfcfSstephen hemminger 
91350612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
91410f6dfcfSstephen hemminger 		return -ENOENT;
91510f6dfcfSstephen hemminger 
91610f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
91710f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
91810f6dfcfSstephen hemminger 
91910f6dfcfSstephen hemminger 	return 0;
92010f6dfcfSstephen hemminger }
92110f6dfcfSstephen hemminger 
92210f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
92310f6dfcfSstephen hemminger 		     struct Qdisc **old)
92410f6dfcfSstephen hemminger {
92510f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
92610f6dfcfSstephen hemminger 
92710f6dfcfSstephen hemminger 	sch_tree_lock(sch);
92810f6dfcfSstephen hemminger 	*old = q->qdisc;
92910f6dfcfSstephen hemminger 	q->qdisc = new;
93050612537SEric Dumazet 	if (*old) {
93110f6dfcfSstephen hemminger 		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
93210f6dfcfSstephen hemminger 		qdisc_reset(*old);
93350612537SEric Dumazet 	}
93410f6dfcfSstephen hemminger 	sch_tree_unlock(sch);
93510f6dfcfSstephen hemminger 
93610f6dfcfSstephen hemminger 	return 0;
93710f6dfcfSstephen hemminger }
93810f6dfcfSstephen hemminger 
93910f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
94010f6dfcfSstephen hemminger {
94110f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
94210f6dfcfSstephen hemminger 	return q->qdisc;
94310f6dfcfSstephen hemminger }
94410f6dfcfSstephen hemminger 
94510f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
94610f6dfcfSstephen hemminger {
94710f6dfcfSstephen hemminger 	return 1;
94810f6dfcfSstephen hemminger }
94910f6dfcfSstephen hemminger 
95010f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
95110f6dfcfSstephen hemminger {
95210f6dfcfSstephen hemminger }
95310f6dfcfSstephen hemminger 
95410f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
95510f6dfcfSstephen hemminger {
95610f6dfcfSstephen hemminger 	if (!walker->stop) {
95710f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
95810f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
95910f6dfcfSstephen hemminger 				walker->stop = 1;
96010f6dfcfSstephen hemminger 				return;
96110f6dfcfSstephen hemminger 			}
96210f6dfcfSstephen hemminger 		walker->count++;
96310f6dfcfSstephen hemminger 	}
96410f6dfcfSstephen hemminger }
96510f6dfcfSstephen hemminger 
96610f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
96710f6dfcfSstephen hemminger 	.graft		=	netem_graft,
96810f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
96910f6dfcfSstephen hemminger 	.get		=	netem_get,
97010f6dfcfSstephen hemminger 	.put		=	netem_put,
97110f6dfcfSstephen hemminger 	.walk		=	netem_walk,
97210f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
97310f6dfcfSstephen hemminger };
97410f6dfcfSstephen hemminger 
97520fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
9761da177e4SLinus Torvalds 	.id		=	"netem",
97710f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
9781da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
9791da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
9801da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
98177be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
9821da177e4SLinus Torvalds 	.drop		=	netem_drop,
9831da177e4SLinus Torvalds 	.init		=	netem_init,
9841da177e4SLinus Torvalds 	.reset		=	netem_reset,
9851da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
9861da177e4SLinus Torvalds 	.change		=	netem_change,
9871da177e4SLinus Torvalds 	.dump		=	netem_dump,
9881da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
9891da177e4SLinus Torvalds };
9901da177e4SLinus Torvalds 
9911da177e4SLinus Torvalds 
9921da177e4SLinus Torvalds static int __init netem_module_init(void)
9931da177e4SLinus Torvalds {
994eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
9951da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
9961da177e4SLinus Torvalds }
9971da177e4SLinus Torvalds static void __exit netem_module_exit(void)
9981da177e4SLinus Torvalds {
9991da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
10001da177e4SLinus Torvalds }
10011da177e4SLinus Torvalds module_init(netem_module_init)
10021da177e4SLinus Torvalds module_exit(netem_module_exit)
10031da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1004