xref: /openbmc/linux/net/sched/sch_netem.c (revision a42b4799)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
261da177e4SLinus Torvalds 
27dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
281da177e4SLinus Torvalds #include <net/pkt_sched.h>
291da177e4SLinus Torvalds 
30250a65f7Sstephen hemminger #define VERSION "1.3"
31eb229c4cSStephen Hemminger 
321da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
331da177e4SLinus Torvalds 	====================================
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
361da177e4SLinus Torvalds 		 Network Emulation Tool
371da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	 ----------------------------------------------------------------
401da177e4SLinus Torvalds 
411da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
421da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
431da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
441da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
451da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
461da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
471da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
481da177e4SLinus Torvalds 
491da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
501da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
511da177e4SLinus Torvalds 	 control either since that can be handled by using token
521da177e4SLinus Torvalds 	 bucket or other rate control.
53661b7972Sstephen hemminger 
54661b7972Sstephen hemminger      Correlated Loss Generator models
55661b7972Sstephen hemminger 
56661b7972Sstephen hemminger 	Added generation of correlated loss according to the
57661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
58661b7972Sstephen hemminger 
59661b7972Sstephen hemminger 	References:
60661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
63661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
64661b7972Sstephen hemminger 
65661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
671da177e4SLinus Torvalds */
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds struct netem_sched_data {
7050612537SEric Dumazet 	/* internal t(ime)fifo qdisc uses sch->q and sch->limit */
7150612537SEric Dumazet 
7250612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
731da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7450612537SEric Dumazet 
7559cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
761da177e4SLinus Torvalds 
77b407621cSStephen Hemminger 	psched_tdiff_t latency;
78b407621cSStephen Hemminger 	psched_tdiff_t jitter;
79b407621cSStephen Hemminger 
801da177e4SLinus Torvalds 	u32 loss;
811da177e4SLinus Torvalds 	u32 limit;
821da177e4SLinus Torvalds 	u32 counter;
831da177e4SLinus Torvalds 	u32 gap;
841da177e4SLinus Torvalds 	u32 duplicate;
850dca51d3SStephen Hemminger 	u32 reorder;
86c865e5d9SStephen Hemminger 	u32 corrupt;
877bc0f28cSHagen Paul Pfeifer 	u32 rate;
8890b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
8990b41a1cSHagen Paul Pfeifer 	u32 cell_size;
9090b41a1cSHagen Paul Pfeifer 	u32 cell_size_reciprocal;
9190b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds 	struct crndstate {
94b407621cSStephen Hemminger 		u32 last;
95b407621cSStephen Hemminger 		u32 rho;
96c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
971da177e4SLinus Torvalds 
981da177e4SLinus Torvalds 	struct disttable {
991da177e4SLinus Torvalds 		u32  size;
1001da177e4SLinus Torvalds 		s16 table[0];
1011da177e4SLinus Torvalds 	} *delay_dist;
102661b7972Sstephen hemminger 
103661b7972Sstephen hemminger 	enum  {
104661b7972Sstephen hemminger 		CLG_RANDOM,
105661b7972Sstephen hemminger 		CLG_4_STATES,
106661b7972Sstephen hemminger 		CLG_GILB_ELL,
107661b7972Sstephen hemminger 	} loss_model;
108661b7972Sstephen hemminger 
109661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
110661b7972Sstephen hemminger 	struct clgstate {
111661b7972Sstephen hemminger 		/* state of the Markov chain */
112661b7972Sstephen hemminger 		u8 state;
113661b7972Sstephen hemminger 
114661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
115661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
116661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
117661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
118661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
119661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
120661b7972Sstephen hemminger 	} clg;
121661b7972Sstephen hemminger 
1221da177e4SLinus Torvalds };
1231da177e4SLinus Torvalds 
12450612537SEric Dumazet /* Time stamp put into socket buffer control block
12550612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
12650612537SEric Dumazet  */
1271da177e4SLinus Torvalds struct netem_skb_cb {
1281da177e4SLinus Torvalds 	psched_time_t	time_to_send;
1291da177e4SLinus Torvalds };
1301da177e4SLinus Torvalds 
1315f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1325f86173bSJussi Kivilinna {
133175f9c1bSJussi Kivilinna 	BUILD_BUG_ON(sizeof(skb->cb) <
134175f9c1bSJussi Kivilinna 		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
135175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1365f86173bSJussi Kivilinna }
1375f86173bSJussi Kivilinna 
1381da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1391da177e4SLinus Torvalds  * Use entropy source for initial seed.
1401da177e4SLinus Torvalds  */
1411da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1421da177e4SLinus Torvalds {
1431da177e4SLinus Torvalds 	state->rho = rho;
1441da177e4SLinus Torvalds 	state->last = net_random();
1451da177e4SLinus Torvalds }
1461da177e4SLinus Torvalds 
1471da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1481da177e4SLinus Torvalds  * Next number depends on last value.
1491da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1501da177e4SLinus Torvalds  */
151b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1521da177e4SLinus Torvalds {
1531da177e4SLinus Torvalds 	u64 value, rho;
1541da177e4SLinus Torvalds 	unsigned long answer;
1551da177e4SLinus Torvalds 
156bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1571da177e4SLinus Torvalds 		return net_random();
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds 	value = net_random();
1601da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1611da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1621da177e4SLinus Torvalds 	state->last = answer;
1631da177e4SLinus Torvalds 	return answer;
1641da177e4SLinus Torvalds }
1651da177e4SLinus Torvalds 
166661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
167661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
168661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
169661b7972Sstephen hemminger  */
170661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
171661b7972Sstephen hemminger {
172661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
173661b7972Sstephen hemminger 	u32 rnd = net_random();
174661b7972Sstephen hemminger 
175661b7972Sstephen hemminger 	/*
17625985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
177661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
178661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
179661b7972Sstephen hemminger 	 * The four states correspond to:
180661b7972Sstephen hemminger 	 *   1 => successfully transmitted packets within a gap period
181661b7972Sstephen hemminger 	 *   4 => isolated losses within a gap period
182661b7972Sstephen hemminger 	 *   3 => lost packets within a burst period
183661b7972Sstephen hemminger 	 *   2 => successfully transmitted packets within a burst period
184661b7972Sstephen hemminger 	 */
185661b7972Sstephen hemminger 	switch (clg->state) {
186661b7972Sstephen hemminger 	case 1:
187661b7972Sstephen hemminger 		if (rnd < clg->a4) {
188661b7972Sstephen hemminger 			clg->state = 4;
189661b7972Sstephen hemminger 			return true;
190661b7972Sstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1) {
191661b7972Sstephen hemminger 			clg->state = 3;
192661b7972Sstephen hemminger 			return true;
193661b7972Sstephen hemminger 		} else if (clg->a1 < rnd)
194661b7972Sstephen hemminger 			clg->state = 1;
195661b7972Sstephen hemminger 
196661b7972Sstephen hemminger 		break;
197661b7972Sstephen hemminger 	case 2:
198661b7972Sstephen hemminger 		if (rnd < clg->a5) {
199661b7972Sstephen hemminger 			clg->state = 3;
200661b7972Sstephen hemminger 			return true;
201661b7972Sstephen hemminger 		} else
202661b7972Sstephen hemminger 			clg->state = 2;
203661b7972Sstephen hemminger 
204661b7972Sstephen hemminger 		break;
205661b7972Sstephen hemminger 	case 3:
206661b7972Sstephen hemminger 		if (rnd < clg->a3)
207661b7972Sstephen hemminger 			clg->state = 2;
208661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
209661b7972Sstephen hemminger 			clg->state = 1;
210661b7972Sstephen hemminger 			return true;
211661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
212661b7972Sstephen hemminger 			clg->state = 3;
213661b7972Sstephen hemminger 			return true;
214661b7972Sstephen hemminger 		}
215661b7972Sstephen hemminger 		break;
216661b7972Sstephen hemminger 	case 4:
217661b7972Sstephen hemminger 		clg->state = 1;
218661b7972Sstephen hemminger 		break;
219661b7972Sstephen hemminger 	}
220661b7972Sstephen hemminger 
221661b7972Sstephen hemminger 	return false;
222661b7972Sstephen hemminger }
223661b7972Sstephen hemminger 
224661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
225661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
226661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
227661b7972Sstephen hemminger  *
22825985edcSLucas De Marchi  * Makes a comparison between random number and the transition
229661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
23025985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
231661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
232661b7972Sstephen hemminger  * packet will be transmitted or lost.
233661b7972Sstephen hemminger  */
234661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
235661b7972Sstephen hemminger {
236661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
237661b7972Sstephen hemminger 
238661b7972Sstephen hemminger 	switch (clg->state) {
239661b7972Sstephen hemminger 	case 1:
240661b7972Sstephen hemminger 		if (net_random() < clg->a1)
241661b7972Sstephen hemminger 			clg->state = 2;
242661b7972Sstephen hemminger 		if (net_random() < clg->a4)
243661b7972Sstephen hemminger 			return true;
244661b7972Sstephen hemminger 	case 2:
245661b7972Sstephen hemminger 		if (net_random() < clg->a2)
246661b7972Sstephen hemminger 			clg->state = 1;
247661b7972Sstephen hemminger 		if (clg->a3 > net_random())
248661b7972Sstephen hemminger 			return true;
249661b7972Sstephen hemminger 	}
250661b7972Sstephen hemminger 
251661b7972Sstephen hemminger 	return false;
252661b7972Sstephen hemminger }
253661b7972Sstephen hemminger 
254661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
255661b7972Sstephen hemminger {
256661b7972Sstephen hemminger 	switch (q->loss_model) {
257661b7972Sstephen hemminger 	case CLG_RANDOM:
258661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
259661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
260661b7972Sstephen hemminger 
261661b7972Sstephen hemminger 	case CLG_4_STATES:
262661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
263661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
264661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
265661b7972Sstephen hemminger 		* the kernel logs
266661b7972Sstephen hemminger 		*/
267661b7972Sstephen hemminger 		return loss_4state(q);
268661b7972Sstephen hemminger 
269661b7972Sstephen hemminger 	case CLG_GILB_ELL:
270661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
271661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
272661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
273661b7972Sstephen hemminger 		* the kernel logs
274661b7972Sstephen hemminger 		*/
275661b7972Sstephen hemminger 		return loss_gilb_ell(q);
276661b7972Sstephen hemminger 	}
277661b7972Sstephen hemminger 
278661b7972Sstephen hemminger 	return false;	/* not reached */
279661b7972Sstephen hemminger }
280661b7972Sstephen hemminger 
281661b7972Sstephen hemminger 
2821da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
2831da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
2841da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
2851da177e4SLinus Torvalds  */
286b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
287b407621cSStephen Hemminger 				struct crndstate *state,
288b407621cSStephen Hemminger 				const struct disttable *dist)
2891da177e4SLinus Torvalds {
290b407621cSStephen Hemminger 	psched_tdiff_t x;
291b407621cSStephen Hemminger 	long t;
292b407621cSStephen Hemminger 	u32 rnd;
2931da177e4SLinus Torvalds 
2941da177e4SLinus Torvalds 	if (sigma == 0)
2951da177e4SLinus Torvalds 		return mu;
2961da177e4SLinus Torvalds 
2971da177e4SLinus Torvalds 	rnd = get_crandom(state);
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	/* default uniform distribution */
3001da177e4SLinus Torvalds 	if (dist == NULL)
3011da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
3021da177e4SLinus Torvalds 
3031da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3041da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3051da177e4SLinus Torvalds 	if (x >= 0)
3061da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3071da177e4SLinus Torvalds 	else
3081da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3091da177e4SLinus Torvalds 
3101da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3111da177e4SLinus Torvalds }
3121da177e4SLinus Torvalds 
31390b41a1cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
3147bc0f28cSHagen Paul Pfeifer {
31590b41a1cSHagen Paul Pfeifer 	u64 ticks;
316fc33cc72SEric Dumazet 
31790b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
31890b41a1cSHagen Paul Pfeifer 
31990b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
32090b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
32190b41a1cSHagen Paul Pfeifer 
32290b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
32390b41a1cSHagen Paul Pfeifer 			cells++;
32490b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
32590b41a1cSHagen Paul Pfeifer 	}
32690b41a1cSHagen Paul Pfeifer 
32790b41a1cSHagen Paul Pfeifer 	ticks = (u64)len * NSEC_PER_SEC;
32890b41a1cSHagen Paul Pfeifer 
32990b41a1cSHagen Paul Pfeifer 	do_div(ticks, q->rate);
330fc33cc72SEric Dumazet 	return PSCHED_NS2TICKS(ticks);
3317bc0f28cSHagen Paul Pfeifer }
3327bc0f28cSHagen Paul Pfeifer 
33350612537SEric Dumazet static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
33450612537SEric Dumazet {
33550612537SEric Dumazet 	struct sk_buff_head *list = &sch->q;
33650612537SEric Dumazet 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
33750612537SEric Dumazet 	struct sk_buff *skb;
33850612537SEric Dumazet 
33950612537SEric Dumazet 	if (likely(skb_queue_len(list) < sch->limit)) {
34050612537SEric Dumazet 		skb = skb_peek_tail(list);
34150612537SEric Dumazet 		/* Optimize for add at tail */
34250612537SEric Dumazet 		if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
34350612537SEric Dumazet 			return qdisc_enqueue_tail(nskb, sch);
34450612537SEric Dumazet 
34550612537SEric Dumazet 		skb_queue_reverse_walk(list, skb) {
34650612537SEric Dumazet 			if (tnext >= netem_skb_cb(skb)->time_to_send)
34750612537SEric Dumazet 				break;
34850612537SEric Dumazet 		}
34950612537SEric Dumazet 
35050612537SEric Dumazet 		__skb_queue_after(list, skb, nskb);
35150612537SEric Dumazet 		sch->qstats.backlog += qdisc_pkt_len(nskb);
35250612537SEric Dumazet 		return NET_XMIT_SUCCESS;
35350612537SEric Dumazet 	}
35450612537SEric Dumazet 
35550612537SEric Dumazet 	return qdisc_reshape_fail(nskb, sch);
35650612537SEric Dumazet }
35750612537SEric Dumazet 
3580afb51e7SStephen Hemminger /*
3590afb51e7SStephen Hemminger  * Insert one skb into qdisc.
3600afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
3610afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
3620afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
3630afb51e7SStephen Hemminger  */
3641da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
3651da177e4SLinus Torvalds {
3661da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
36789e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
36889e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
3690afb51e7SStephen Hemminger 	struct sk_buff *skb2;
3701da177e4SLinus Torvalds 	int ret;
3710afb51e7SStephen Hemminger 	int count = 1;
3721da177e4SLinus Torvalds 
3730afb51e7SStephen Hemminger 	/* Random duplication */
3740afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
3750afb51e7SStephen Hemminger 		++count;
3760afb51e7SStephen Hemminger 
377661b7972Sstephen hemminger 	/* Drop packet? */
378661b7972Sstephen hemminger 	if (loss_event(q))
3790afb51e7SStephen Hemminger 		--count;
3800afb51e7SStephen Hemminger 
3810afb51e7SStephen Hemminger 	if (count == 0) {
3821da177e4SLinus Torvalds 		sch->qstats.drops++;
3831da177e4SLinus Torvalds 		kfree_skb(skb);
384c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3851da177e4SLinus Torvalds 	}
3861da177e4SLinus Torvalds 
3874e8a5201SDavid S. Miller 	skb_orphan(skb);
3884e8a5201SDavid S. Miller 
3890afb51e7SStephen Hemminger 	/*
3900afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
3910afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
3920afb51e7SStephen Hemminger 	 * skb will be queued.
393d5d75cd6SStephen Hemminger 	 */
3940afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
3957698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
3960afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
3970afb51e7SStephen Hemminger 		q->duplicate = 0;
398d5d75cd6SStephen Hemminger 
3995f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
4000afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4011da177e4SLinus Torvalds 	}
4021da177e4SLinus Torvalds 
403c865e5d9SStephen Hemminger 	/*
404c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
405c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
406c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
407c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
408c865e5d9SStephen Hemminger 	 */
409c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
410f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
411f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
412f64f9e71SJoe Perches 		     skb_checksum_help(skb))) {
413c865e5d9SStephen Hemminger 			sch->qstats.drops++;
414c865e5d9SStephen Hemminger 			return NET_XMIT_DROP;
415c865e5d9SStephen Hemminger 		}
416c865e5d9SStephen Hemminger 
417c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
418c865e5d9SStephen Hemminger 	}
419c865e5d9SStephen Hemminger 
4205f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
421f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
422a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
423f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
4240f9f32acSStephen Hemminger 		psched_time_t now;
42507aaa115SStephen Hemminger 		psched_tdiff_t delay;
42607aaa115SStephen Hemminger 
42707aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
42807aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
42907aaa115SStephen Hemminger 
4303bebcda2SPatrick McHardy 		now = psched_get_time();
4317bc0f28cSHagen Paul Pfeifer 
4327bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
43350612537SEric Dumazet 			struct sk_buff_head *list = &sch->q;
4347bc0f28cSHagen Paul Pfeifer 
43590b41a1cSHagen Paul Pfeifer 			delay += packet_len_2_sched_time(skb->len, q);
4367bc0f28cSHagen Paul Pfeifer 
4377bc0f28cSHagen Paul Pfeifer 			if (!skb_queue_empty(list)) {
4387bc0f28cSHagen Paul Pfeifer 				/*
4397bc0f28cSHagen Paul Pfeifer 				 * Last packet in queue is reference point (now).
4407bc0f28cSHagen Paul Pfeifer 				 * First packet in queue is already in flight,
4417bc0f28cSHagen Paul Pfeifer 				 * calculate this time bonus and substract
4427bc0f28cSHagen Paul Pfeifer 				 * from delay.
4437bc0f28cSHagen Paul Pfeifer 				 */
4447bc0f28cSHagen Paul Pfeifer 				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
4457bc0f28cSHagen Paul Pfeifer 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
4467bc0f28cSHagen Paul Pfeifer 			}
4477bc0f28cSHagen Paul Pfeifer 		}
4487bc0f28cSHagen Paul Pfeifer 
4497c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
4501da177e4SLinus Torvalds 		++q->counter;
45150612537SEric Dumazet 		ret = tfifo_enqueue(skb, sch);
4521da177e4SLinus Torvalds 	} else {
4530dca51d3SStephen Hemminger 		/*
4540dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
4550dca51d3SStephen Hemminger 		 * of the queue.
4560dca51d3SStephen Hemminger 		 */
4573bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
4580dca51d3SStephen Hemminger 		q->counter = 0;
4598ba25dadSJarek Poplawski 
46050612537SEric Dumazet 		__skb_queue_head(&sch->q, skb);
461eb101924SHagen Paul Pfeifer 		sch->qstats.backlog += qdisc_pkt_len(skb);
462eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
4638ba25dadSJarek Poplawski 		ret = NET_XMIT_SUCCESS;
4641da177e4SLinus Torvalds 	}
4651da177e4SLinus Torvalds 
46610f6dfcfSstephen hemminger 	if (ret != NET_XMIT_SUCCESS) {
46710f6dfcfSstephen hemminger 		if (net_xmit_drop_count(ret)) {
4681da177e4SLinus Torvalds 			sch->qstats.drops++;
46910f6dfcfSstephen hemminger 			return ret;
47010f6dfcfSstephen hemminger 		}
471378a2f09SJarek Poplawski 	}
4721da177e4SLinus Torvalds 
47310f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
4741da177e4SLinus Torvalds }
4751da177e4SLinus Torvalds 
4761da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch)
4771da177e4SLinus Torvalds {
4781da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
47950612537SEric Dumazet 	unsigned int len;
4801da177e4SLinus Torvalds 
48150612537SEric Dumazet 	len = qdisc_queue_drop(sch);
48250612537SEric Dumazet 	if (!len && q->qdisc && q->qdisc->ops->drop)
48350612537SEric Dumazet 	    len = q->qdisc->ops->drop(q->qdisc);
48450612537SEric Dumazet 	if (len)
4851da177e4SLinus Torvalds 		sch->qstats.drops++;
48650612537SEric Dumazet 
4871da177e4SLinus Torvalds 	return len;
4881da177e4SLinus Torvalds }
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
4911da177e4SLinus Torvalds {
4921da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4931da177e4SLinus Torvalds 	struct sk_buff *skb;
4941da177e4SLinus Torvalds 
495fd245a4aSEric Dumazet 	if (qdisc_is_throttled(sch))
49611274e5aSStephen Hemminger 		return NULL;
49711274e5aSStephen Hemminger 
49850612537SEric Dumazet tfifo_dequeue:
49950612537SEric Dumazet 	skb = qdisc_peek_head(sch);
500771018e7SStephen Hemminger 	if (skb) {
5015f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
5020f9f32acSStephen Hemminger 
5030f9f32acSStephen Hemminger 		/* if more time remaining? */
50450612537SEric Dumazet 		if (cb->time_to_send <= psched_get_time()) {
50550612537SEric Dumazet 			skb = qdisc_dequeue_tail(sch);
50677be155cSJarek Poplawski 			if (unlikely(!skb))
50750612537SEric Dumazet 				goto qdisc_dequeue;
50803c05f0dSJarek Poplawski 
5098caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
5108caf1539SJarek Poplawski 			/*
5118caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
5128caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
5138caf1539SJarek Poplawski 			 */
5148caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
5158caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
5168caf1539SJarek Poplawski #endif
51710f6dfcfSstephen hemminger 
51850612537SEric Dumazet 			if (q->qdisc) {
51950612537SEric Dumazet 				int err = qdisc_enqueue(skb, q->qdisc);
52050612537SEric Dumazet 
52150612537SEric Dumazet 				if (unlikely(err != NET_XMIT_SUCCESS)) {
52250612537SEric Dumazet 					if (net_xmit_drop_count(err)) {
52350612537SEric Dumazet 						sch->qstats.drops++;
52450612537SEric Dumazet 						qdisc_tree_decrease_qlen(sch, 1);
52550612537SEric Dumazet 					}
52650612537SEric Dumazet 				}
52750612537SEric Dumazet 				goto tfifo_dequeue;
52850612537SEric Dumazet 			}
52950612537SEric Dumazet deliver:
53010f6dfcfSstephen hemminger 			qdisc_unthrottled(sch);
53110f6dfcfSstephen hemminger 			qdisc_bstats_update(sch, skb);
5320f9f32acSStephen Hemminger 			return skb;
53311274e5aSStephen Hemminger 		}
53407aaa115SStephen Hemminger 
53550612537SEric Dumazet 		if (q->qdisc) {
53650612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
53750612537SEric Dumazet 			if (skb)
53850612537SEric Dumazet 				goto deliver;
53950612537SEric Dumazet 		}
54011274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
5410f9f32acSStephen Hemminger 	}
5420f9f32acSStephen Hemminger 
54350612537SEric Dumazet qdisc_dequeue:
54450612537SEric Dumazet 	if (q->qdisc) {
54550612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
54650612537SEric Dumazet 		if (skb)
54750612537SEric Dumazet 			goto deliver;
54850612537SEric Dumazet 	}
5490f9f32acSStephen Hemminger 	return NULL;
5501da177e4SLinus Torvalds }
5511da177e4SLinus Torvalds 
5521da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
5531da177e4SLinus Torvalds {
5541da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5551da177e4SLinus Torvalds 
55650612537SEric Dumazet 	qdisc_reset_queue(sch);
55750612537SEric Dumazet 	if (q->qdisc)
5581da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
55959cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
5601da177e4SLinus Torvalds }
5611da177e4SLinus Torvalds 
5626373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
5636373a9a2Sstephen hemminger {
5646373a9a2Sstephen hemminger 	if (d) {
5656373a9a2Sstephen hemminger 		if (is_vmalloc_addr(d))
5666373a9a2Sstephen hemminger 			vfree(d);
5676373a9a2Sstephen hemminger 		else
5686373a9a2Sstephen hemminger 			kfree(d);
5696373a9a2Sstephen hemminger 	}
5706373a9a2Sstephen hemminger }
5716373a9a2Sstephen hemminger 
5721da177e4SLinus Torvalds /*
5731da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
5741da177e4SLinus Torvalds  * signed 16 bit values.
5751da177e4SLinus Torvalds  */
5761e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
5771da177e4SLinus Torvalds {
5781da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5796373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
5801e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
5817698b4fcSDavid S. Miller 	spinlock_t *root_lock;
5821da177e4SLinus Torvalds 	struct disttable *d;
5831da177e4SLinus Torvalds 	int i;
5846373a9a2Sstephen hemminger 	size_t s;
5851da177e4SLinus Torvalds 
586df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
5871da177e4SLinus Torvalds 		return -EINVAL;
5881da177e4SLinus Torvalds 
5896373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
590bb52c7acSEric Dumazet 	d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
5916373a9a2Sstephen hemminger 	if (!d)
5926373a9a2Sstephen hemminger 		d = vmalloc(s);
5931da177e4SLinus Torvalds 	if (!d)
5941da177e4SLinus Torvalds 		return -ENOMEM;
5951da177e4SLinus Torvalds 
5961da177e4SLinus Torvalds 	d->size = n;
5971da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
5981da177e4SLinus Torvalds 		d->table[i] = data[i];
5991da177e4SLinus Torvalds 
600102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
6017698b4fcSDavid S. Miller 
6027698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
603bb52c7acSEric Dumazet 	swap(q->delay_dist, d);
6047698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
605bb52c7acSEric Dumazet 
606bb52c7acSEric Dumazet 	dist_free(d);
6071da177e4SLinus Torvalds 	return 0;
6081da177e4SLinus Torvalds }
6091da177e4SLinus Torvalds 
610265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
6111da177e4SLinus Torvalds {
6121da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6131e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
6161da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
6171da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
6181da177e4SLinus Torvalds }
6191da177e4SLinus Torvalds 
620265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
6210dca51d3SStephen Hemminger {
6220dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6231e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
6240dca51d3SStephen Hemminger 
6250dca51d3SStephen Hemminger 	q->reorder = r->probability;
6260dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
6270dca51d3SStephen Hemminger }
6280dca51d3SStephen Hemminger 
629265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
630c865e5d9SStephen Hemminger {
631c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6321e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
633c865e5d9SStephen Hemminger 
634c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
635c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
636c865e5d9SStephen Hemminger }
637c865e5d9SStephen Hemminger 
6387bc0f28cSHagen Paul Pfeifer static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
6397bc0f28cSHagen Paul Pfeifer {
6407bc0f28cSHagen Paul Pfeifer 	struct netem_sched_data *q = qdisc_priv(sch);
6417bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
6427bc0f28cSHagen Paul Pfeifer 
6437bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
64490b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
64590b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
64690b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
64790b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
64890b41a1cSHagen Paul Pfeifer 	q->cell_overhead = r->cell_overhead;
6497bc0f28cSHagen Paul Pfeifer }
6507bc0f28cSHagen Paul Pfeifer 
651661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
652661b7972Sstephen hemminger {
653661b7972Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
654661b7972Sstephen hemminger 	const struct nlattr *la;
655661b7972Sstephen hemminger 	int rem;
656661b7972Sstephen hemminger 
657661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
658661b7972Sstephen hemminger 		u16 type = nla_type(la);
659661b7972Sstephen hemminger 
660661b7972Sstephen hemminger 		switch(type) {
661661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
662661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
663661b7972Sstephen hemminger 
6642494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
665661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
666661b7972Sstephen hemminger 				return -EINVAL;
667661b7972Sstephen hemminger 			}
668661b7972Sstephen hemminger 
669661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
670661b7972Sstephen hemminger 
671661b7972Sstephen hemminger 			q->clg.state = 1;
672661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
673661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
674661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
675661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
676661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
677661b7972Sstephen hemminger 			break;
678661b7972Sstephen hemminger 		}
679661b7972Sstephen hemminger 
680661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
681661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
682661b7972Sstephen hemminger 
6832494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
6842494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
685661b7972Sstephen hemminger 				return -EINVAL;
686661b7972Sstephen hemminger 			}
687661b7972Sstephen hemminger 
688661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
689661b7972Sstephen hemminger 			q->clg.state = 1;
690661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
691661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
692661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
693661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
694661b7972Sstephen hemminger 			break;
695661b7972Sstephen hemminger 		}
696661b7972Sstephen hemminger 
697661b7972Sstephen hemminger 		default:
698661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
699661b7972Sstephen hemminger 			return -EINVAL;
700661b7972Sstephen hemminger 		}
701661b7972Sstephen hemminger 	}
702661b7972Sstephen hemminger 
703661b7972Sstephen hemminger 	return 0;
704661b7972Sstephen hemminger }
705661b7972Sstephen hemminger 
70627a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
70727a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
70827a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
70927a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
7107bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
711661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
71227a3421eSPatrick McHardy };
71327a3421eSPatrick McHardy 
7142c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
7152c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
7162c10b32bSThomas Graf {
7172c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
7182c10b32bSThomas Graf 
719661b7972Sstephen hemminger 	if (nested_len < 0) {
720661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
7212c10b32bSThomas Graf 		return -EINVAL;
722661b7972Sstephen hemminger 	}
723661b7972Sstephen hemminger 
7242c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
7252c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
7262c10b32bSThomas Graf 				 nested_len, policy);
727661b7972Sstephen hemminger 
7282c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
7292c10b32bSThomas Graf 	return 0;
7302c10b32bSThomas Graf }
7312c10b32bSThomas Graf 
732c865e5d9SStephen Hemminger /* Parse netlink message to set options */
7331e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
7341da177e4SLinus Torvalds {
7351da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
736b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
7371da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
7381da177e4SLinus Torvalds 	int ret;
7391da177e4SLinus Torvalds 
740b03f4672SPatrick McHardy 	if (opt == NULL)
7411da177e4SLinus Torvalds 		return -EINVAL;
7421da177e4SLinus Torvalds 
7432c10b32bSThomas Graf 	qopt = nla_data(opt);
7442c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
745b03f4672SPatrick McHardy 	if (ret < 0)
746b03f4672SPatrick McHardy 		return ret;
747b03f4672SPatrick McHardy 
74850612537SEric Dumazet 	sch->limit = qopt->limit;
7491da177e4SLinus Torvalds 
7501da177e4SLinus Torvalds 	q->latency = qopt->latency;
7511da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
7521da177e4SLinus Torvalds 	q->limit = qopt->limit;
7531da177e4SLinus Torvalds 	q->gap = qopt->gap;
7540dca51d3SStephen Hemminger 	q->counter = 0;
7551da177e4SLinus Torvalds 	q->loss = qopt->loss;
7561da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
7571da177e4SLinus Torvalds 
758bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
759bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
7600dca51d3SStephen Hemminger 	 */
761a362e0a7SStephen Hemminger 	if (q->gap)
7620dca51d3SStephen Hemminger 		q->reorder = ~0;
7630dca51d3SStephen Hemminger 
764265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
765265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
7661da177e4SLinus Torvalds 
7671e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
7681e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
7691da177e4SLinus Torvalds 		if (ret)
7701da177e4SLinus Torvalds 			return ret;
7711da177e4SLinus Torvalds 	}
772c865e5d9SStephen Hemminger 
773265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
774265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
7751da177e4SLinus Torvalds 
776265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
777265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
7781da177e4SLinus Torvalds 
7797bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
7807bc0f28cSHagen Paul Pfeifer 		get_rate(sch, tb[TCA_NETEM_RATE]);
7817bc0f28cSHagen Paul Pfeifer 
782661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
783661b7972Sstephen hemminger 	if (tb[TCA_NETEM_LOSS])
784661b7972Sstephen hemminger 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
785661b7972Sstephen hemminger 
786661b7972Sstephen hemminger 	return ret;
7871da177e4SLinus Torvalds }
7881da177e4SLinus Torvalds 
7891e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
7901da177e4SLinus Torvalds {
7911da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7921da177e4SLinus Torvalds 	int ret;
7931da177e4SLinus Torvalds 
7941da177e4SLinus Torvalds 	if (!opt)
7951da177e4SLinus Torvalds 		return -EINVAL;
7961da177e4SLinus Torvalds 
79759cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
7981da177e4SLinus Torvalds 
799661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
8001da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
80150612537SEric Dumazet 	if (ret)
802250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
8031da177e4SLinus Torvalds 	return ret;
8041da177e4SLinus Torvalds }
8051da177e4SLinus Torvalds 
8061da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
8071da177e4SLinus Torvalds {
8081da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
8091da177e4SLinus Torvalds 
81059cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
81150612537SEric Dumazet 	if (q->qdisc)
8121da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
8136373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
8141da177e4SLinus Torvalds }
8151da177e4SLinus Torvalds 
816661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
817661b7972Sstephen hemminger 			   struct sk_buff *skb)
818661b7972Sstephen hemminger {
819661b7972Sstephen hemminger 	struct nlattr *nest;
820661b7972Sstephen hemminger 
821661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
822661b7972Sstephen hemminger 	if (nest == NULL)
823661b7972Sstephen hemminger 		goto nla_put_failure;
824661b7972Sstephen hemminger 
825661b7972Sstephen hemminger 	switch (q->loss_model) {
826661b7972Sstephen hemminger 	case CLG_RANDOM:
827661b7972Sstephen hemminger 		/* legacy loss model */
828661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
829661b7972Sstephen hemminger 		return 0;	/* no data */
830661b7972Sstephen hemminger 
831661b7972Sstephen hemminger 	case CLG_4_STATES: {
832661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
833661b7972Sstephen hemminger 			.p13 = q->clg.a1,
834661b7972Sstephen hemminger 			.p31 = q->clg.a2,
835661b7972Sstephen hemminger 			.p32 = q->clg.a3,
836661b7972Sstephen hemminger 			.p14 = q->clg.a4,
837661b7972Sstephen hemminger 			.p23 = q->clg.a5,
838661b7972Sstephen hemminger 		};
839661b7972Sstephen hemminger 
840661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
841661b7972Sstephen hemminger 		break;
842661b7972Sstephen hemminger 	}
843661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
844661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
845661b7972Sstephen hemminger 			.p = q->clg.a1,
846661b7972Sstephen hemminger 			.r = q->clg.a2,
847661b7972Sstephen hemminger 			.h = q->clg.a3,
848661b7972Sstephen hemminger 			.k1 = q->clg.a4,
849661b7972Sstephen hemminger 		};
850661b7972Sstephen hemminger 
851661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
852661b7972Sstephen hemminger 		break;
853661b7972Sstephen hemminger 	}
854661b7972Sstephen hemminger 	}
855661b7972Sstephen hemminger 
856661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
857661b7972Sstephen hemminger 	return 0;
858661b7972Sstephen hemminger 
859661b7972Sstephen hemminger nla_put_failure:
860661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
861661b7972Sstephen hemminger 	return -1;
862661b7972Sstephen hemminger }
863661b7972Sstephen hemminger 
8641da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
8651da177e4SLinus Torvalds {
8661da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
867861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
8681da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
8691da177e4SLinus Torvalds 	struct tc_netem_corr cor;
8700dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
871c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
8727bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
8731da177e4SLinus Torvalds 
8741da177e4SLinus Torvalds 	qopt.latency = q->latency;
8751da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
8761da177e4SLinus Torvalds 	qopt.limit = q->limit;
8771da177e4SLinus Torvalds 	qopt.loss = q->loss;
8781da177e4SLinus Torvalds 	qopt.gap = q->gap;
8791da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
8801e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
8811da177e4SLinus Torvalds 
8821da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
8831da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
8841da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
8851e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
8860dca51d3SStephen Hemminger 
8870dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
8880dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
8891e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
8900dca51d3SStephen Hemminger 
891c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
892c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
8931e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
894c865e5d9SStephen Hemminger 
8957bc0f28cSHagen Paul Pfeifer 	rate.rate = q->rate;
89690b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
89790b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
89890b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
8997bc0f28cSHagen Paul Pfeifer 	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
9007bc0f28cSHagen Paul Pfeifer 
901661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
902661b7972Sstephen hemminger 		goto nla_put_failure;
903661b7972Sstephen hemminger 
904861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
9051da177e4SLinus Torvalds 
9061e90474cSPatrick McHardy nla_put_failure:
907861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
9081da177e4SLinus Torvalds 	return -1;
9091da177e4SLinus Torvalds }
9101da177e4SLinus Torvalds 
91110f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
91210f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
91310f6dfcfSstephen hemminger {
91410f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
91510f6dfcfSstephen hemminger 
91650612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
91710f6dfcfSstephen hemminger 		return -ENOENT;
91810f6dfcfSstephen hemminger 
91910f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
92010f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
92110f6dfcfSstephen hemminger 
92210f6dfcfSstephen hemminger 	return 0;
92310f6dfcfSstephen hemminger }
92410f6dfcfSstephen hemminger 
92510f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
92610f6dfcfSstephen hemminger 		     struct Qdisc **old)
92710f6dfcfSstephen hemminger {
92810f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
92910f6dfcfSstephen hemminger 
93010f6dfcfSstephen hemminger 	sch_tree_lock(sch);
93110f6dfcfSstephen hemminger 	*old = q->qdisc;
93210f6dfcfSstephen hemminger 	q->qdisc = new;
93350612537SEric Dumazet 	if (*old) {
93410f6dfcfSstephen hemminger 		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
93510f6dfcfSstephen hemminger 		qdisc_reset(*old);
93650612537SEric Dumazet 	}
93710f6dfcfSstephen hemminger 	sch_tree_unlock(sch);
93810f6dfcfSstephen hemminger 
93910f6dfcfSstephen hemminger 	return 0;
94010f6dfcfSstephen hemminger }
94110f6dfcfSstephen hemminger 
94210f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
94310f6dfcfSstephen hemminger {
94410f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
94510f6dfcfSstephen hemminger 	return q->qdisc;
94610f6dfcfSstephen hemminger }
94710f6dfcfSstephen hemminger 
94810f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
94910f6dfcfSstephen hemminger {
95010f6dfcfSstephen hemminger 	return 1;
95110f6dfcfSstephen hemminger }
95210f6dfcfSstephen hemminger 
95310f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
95410f6dfcfSstephen hemminger {
95510f6dfcfSstephen hemminger }
95610f6dfcfSstephen hemminger 
95710f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
95810f6dfcfSstephen hemminger {
95910f6dfcfSstephen hemminger 	if (!walker->stop) {
96010f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
96110f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
96210f6dfcfSstephen hemminger 				walker->stop = 1;
96310f6dfcfSstephen hemminger 				return;
96410f6dfcfSstephen hemminger 			}
96510f6dfcfSstephen hemminger 		walker->count++;
96610f6dfcfSstephen hemminger 	}
96710f6dfcfSstephen hemminger }
96810f6dfcfSstephen hemminger 
96910f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
97010f6dfcfSstephen hemminger 	.graft		=	netem_graft,
97110f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
97210f6dfcfSstephen hemminger 	.get		=	netem_get,
97310f6dfcfSstephen hemminger 	.put		=	netem_put,
97410f6dfcfSstephen hemminger 	.walk		=	netem_walk,
97510f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
97610f6dfcfSstephen hemminger };
97710f6dfcfSstephen hemminger 
97820fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
9791da177e4SLinus Torvalds 	.id		=	"netem",
98010f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
9811da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
9821da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
9831da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
98477be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
9851da177e4SLinus Torvalds 	.drop		=	netem_drop,
9861da177e4SLinus Torvalds 	.init		=	netem_init,
9871da177e4SLinus Torvalds 	.reset		=	netem_reset,
9881da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
9891da177e4SLinus Torvalds 	.change		=	netem_change,
9901da177e4SLinus Torvalds 	.dump		=	netem_dump,
9911da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
9921da177e4SLinus Torvalds };
9931da177e4SLinus Torvalds 
9941da177e4SLinus Torvalds 
9951da177e4SLinus Torvalds static int __init netem_module_init(void)
9961da177e4SLinus Torvalds {
997eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
9981da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
9991da177e4SLinus Torvalds }
10001da177e4SLinus Torvalds static void __exit netem_module_exit(void)
10011da177e4SLinus Torvalds {
10021da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
10031da177e4SLinus Torvalds }
10041da177e4SLinus Torvalds module_init(netem_module_init)
10051da177e4SLinus Torvalds module_exit(netem_module_exit)
10061da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1007