xref: /openbmc/linux/net/sched/sch_netem.c (revision 960fb66e)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
261da177e4SLinus Torvalds 
27dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
281da177e4SLinus Torvalds #include <net/pkt_sched.h>
29e4ae004bSEric Dumazet #include <net/inet_ecn.h>
301da177e4SLinus Torvalds 
31250a65f7Sstephen hemminger #define VERSION "1.3"
32eb229c4cSStephen Hemminger 
331da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
341da177e4SLinus Torvalds 	====================================
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
371da177e4SLinus Torvalds 		 Network Emulation Tool
381da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds 	 ----------------------------------------------------------------
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
431da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
441da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
451da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
461da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
471da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
481da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
511da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
521da177e4SLinus Torvalds 	 control either since that can be handled by using token
531da177e4SLinus Torvalds 	 bucket or other rate control.
54661b7972Sstephen hemminger 
55661b7972Sstephen hemminger      Correlated Loss Generator models
56661b7972Sstephen hemminger 
57661b7972Sstephen hemminger 	Added generation of correlated loss according to the
58661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
59661b7972Sstephen hemminger 
60661b7972Sstephen hemminger 	References:
61661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
62661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
63661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
64661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
65661b7972Sstephen hemminger 
66661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
67661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
681da177e4SLinus Torvalds */
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds struct netem_sched_data {
7150612537SEric Dumazet 	/* internal t(ime)fifo qdisc uses sch->q and sch->limit */
7250612537SEric Dumazet 
7350612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
741da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7550612537SEric Dumazet 
7659cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
771da177e4SLinus Torvalds 
78b407621cSStephen Hemminger 	psched_tdiff_t latency;
79b407621cSStephen Hemminger 	psched_tdiff_t jitter;
80b407621cSStephen Hemminger 
811da177e4SLinus Torvalds 	u32 loss;
82e4ae004bSEric Dumazet 	u32 ecn;
831da177e4SLinus Torvalds 	u32 limit;
841da177e4SLinus Torvalds 	u32 counter;
851da177e4SLinus Torvalds 	u32 gap;
861da177e4SLinus Torvalds 	u32 duplicate;
870dca51d3SStephen Hemminger 	u32 reorder;
88c865e5d9SStephen Hemminger 	u32 corrupt;
897bc0f28cSHagen Paul Pfeifer 	u32 rate;
9090b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9190b41a1cSHagen Paul Pfeifer 	u32 cell_size;
9290b41a1cSHagen Paul Pfeifer 	u32 cell_size_reciprocal;
9390b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
941da177e4SLinus Torvalds 
951da177e4SLinus Torvalds 	struct crndstate {
96b407621cSStephen Hemminger 		u32 last;
97b407621cSStephen Hemminger 		u32 rho;
98c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 	struct disttable {
1011da177e4SLinus Torvalds 		u32  size;
1021da177e4SLinus Torvalds 		s16 table[0];
1031da177e4SLinus Torvalds 	} *delay_dist;
104661b7972Sstephen hemminger 
105661b7972Sstephen hemminger 	enum  {
106661b7972Sstephen hemminger 		CLG_RANDOM,
107661b7972Sstephen hemminger 		CLG_4_STATES,
108661b7972Sstephen hemminger 		CLG_GILB_ELL,
109661b7972Sstephen hemminger 	} loss_model;
110661b7972Sstephen hemminger 
111661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
112661b7972Sstephen hemminger 	struct clgstate {
113661b7972Sstephen hemminger 		/* state of the Markov chain */
114661b7972Sstephen hemminger 		u8 state;
115661b7972Sstephen hemminger 
116661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
117661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
118661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
119661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
120661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
121661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
122661b7972Sstephen hemminger 	} clg;
123661b7972Sstephen hemminger 
1241da177e4SLinus Torvalds };
1251da177e4SLinus Torvalds 
12650612537SEric Dumazet /* Time stamp put into socket buffer control block
12750612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
12850612537SEric Dumazet  */
1291da177e4SLinus Torvalds struct netem_skb_cb {
1301da177e4SLinus Torvalds 	psched_time_t	time_to_send;
1311da177e4SLinus Torvalds };
1321da177e4SLinus Torvalds 
1335f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1345f86173bSJussi Kivilinna {
13516bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
136175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1375f86173bSJussi Kivilinna }
1385f86173bSJussi Kivilinna 
1391da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1401da177e4SLinus Torvalds  * Use entropy source for initial seed.
1411da177e4SLinus Torvalds  */
1421da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1431da177e4SLinus Torvalds {
1441da177e4SLinus Torvalds 	state->rho = rho;
1451da177e4SLinus Torvalds 	state->last = net_random();
1461da177e4SLinus Torvalds }
1471da177e4SLinus Torvalds 
1481da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1491da177e4SLinus Torvalds  * Next number depends on last value.
1501da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1511da177e4SLinus Torvalds  */
152b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1531da177e4SLinus Torvalds {
1541da177e4SLinus Torvalds 	u64 value, rho;
1551da177e4SLinus Torvalds 	unsigned long answer;
1561da177e4SLinus Torvalds 
157bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1581da177e4SLinus Torvalds 		return net_random();
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds 	value = net_random();
1611da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1621da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1631da177e4SLinus Torvalds 	state->last = answer;
1641da177e4SLinus Torvalds 	return answer;
1651da177e4SLinus Torvalds }
1661da177e4SLinus Torvalds 
167661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
168661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
169661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
170661b7972Sstephen hemminger  */
171661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
172661b7972Sstephen hemminger {
173661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
174661b7972Sstephen hemminger 	u32 rnd = net_random();
175661b7972Sstephen hemminger 
176661b7972Sstephen hemminger 	/*
17725985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
178661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
179661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
180661b7972Sstephen hemminger 	 * The four states correspond to:
181661b7972Sstephen hemminger 	 *   1 => successfully transmitted packets within a gap period
182661b7972Sstephen hemminger 	 *   4 => isolated losses within a gap period
183661b7972Sstephen hemminger 	 *   3 => lost packets within a burst period
184661b7972Sstephen hemminger 	 *   2 => successfully transmitted packets within a burst period
185661b7972Sstephen hemminger 	 */
186661b7972Sstephen hemminger 	switch (clg->state) {
187661b7972Sstephen hemminger 	case 1:
188661b7972Sstephen hemminger 		if (rnd < clg->a4) {
189661b7972Sstephen hemminger 			clg->state = 4;
190661b7972Sstephen hemminger 			return true;
191661b7972Sstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1) {
192661b7972Sstephen hemminger 			clg->state = 3;
193661b7972Sstephen hemminger 			return true;
194661b7972Sstephen hemminger 		} else if (clg->a1 < rnd)
195661b7972Sstephen hemminger 			clg->state = 1;
196661b7972Sstephen hemminger 
197661b7972Sstephen hemminger 		break;
198661b7972Sstephen hemminger 	case 2:
199661b7972Sstephen hemminger 		if (rnd < clg->a5) {
200661b7972Sstephen hemminger 			clg->state = 3;
201661b7972Sstephen hemminger 			return true;
202661b7972Sstephen hemminger 		} else
203661b7972Sstephen hemminger 			clg->state = 2;
204661b7972Sstephen hemminger 
205661b7972Sstephen hemminger 		break;
206661b7972Sstephen hemminger 	case 3:
207661b7972Sstephen hemminger 		if (rnd < clg->a3)
208661b7972Sstephen hemminger 			clg->state = 2;
209661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
210661b7972Sstephen hemminger 			clg->state = 1;
211661b7972Sstephen hemminger 			return true;
212661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
213661b7972Sstephen hemminger 			clg->state = 3;
214661b7972Sstephen hemminger 			return true;
215661b7972Sstephen hemminger 		}
216661b7972Sstephen hemminger 		break;
217661b7972Sstephen hemminger 	case 4:
218661b7972Sstephen hemminger 		clg->state = 1;
219661b7972Sstephen hemminger 		break;
220661b7972Sstephen hemminger 	}
221661b7972Sstephen hemminger 
222661b7972Sstephen hemminger 	return false;
223661b7972Sstephen hemminger }
224661b7972Sstephen hemminger 
225661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
226661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
227661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
228661b7972Sstephen hemminger  *
22925985edcSLucas De Marchi  * Makes a comparison between random number and the transition
230661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
23125985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
232661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
233661b7972Sstephen hemminger  * packet will be transmitted or lost.
234661b7972Sstephen hemminger  */
235661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
236661b7972Sstephen hemminger {
237661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
238661b7972Sstephen hemminger 
239661b7972Sstephen hemminger 	switch (clg->state) {
240661b7972Sstephen hemminger 	case 1:
241661b7972Sstephen hemminger 		if (net_random() < clg->a1)
242661b7972Sstephen hemminger 			clg->state = 2;
243661b7972Sstephen hemminger 		if (net_random() < clg->a4)
244661b7972Sstephen hemminger 			return true;
245661b7972Sstephen hemminger 	case 2:
246661b7972Sstephen hemminger 		if (net_random() < clg->a2)
247661b7972Sstephen hemminger 			clg->state = 1;
248661b7972Sstephen hemminger 		if (clg->a3 > net_random())
249661b7972Sstephen hemminger 			return true;
250661b7972Sstephen hemminger 	}
251661b7972Sstephen hemminger 
252661b7972Sstephen hemminger 	return false;
253661b7972Sstephen hemminger }
254661b7972Sstephen hemminger 
255661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
256661b7972Sstephen hemminger {
257661b7972Sstephen hemminger 	switch (q->loss_model) {
258661b7972Sstephen hemminger 	case CLG_RANDOM:
259661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
260661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
261661b7972Sstephen hemminger 
262661b7972Sstephen hemminger 	case CLG_4_STATES:
263661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
264661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
265661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
266661b7972Sstephen hemminger 		* the kernel logs
267661b7972Sstephen hemminger 		*/
268661b7972Sstephen hemminger 		return loss_4state(q);
269661b7972Sstephen hemminger 
270661b7972Sstephen hemminger 	case CLG_GILB_ELL:
271661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
272661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
273661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
274661b7972Sstephen hemminger 		* the kernel logs
275661b7972Sstephen hemminger 		*/
276661b7972Sstephen hemminger 		return loss_gilb_ell(q);
277661b7972Sstephen hemminger 	}
278661b7972Sstephen hemminger 
279661b7972Sstephen hemminger 	return false;	/* not reached */
280661b7972Sstephen hemminger }
281661b7972Sstephen hemminger 
282661b7972Sstephen hemminger 
2831da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
2841da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
2851da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
2861da177e4SLinus Torvalds  */
287b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
288b407621cSStephen Hemminger 				struct crndstate *state,
289b407621cSStephen Hemminger 				const struct disttable *dist)
2901da177e4SLinus Torvalds {
291b407621cSStephen Hemminger 	psched_tdiff_t x;
292b407621cSStephen Hemminger 	long t;
293b407621cSStephen Hemminger 	u32 rnd;
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds 	if (sigma == 0)
2961da177e4SLinus Torvalds 		return mu;
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds 	rnd = get_crandom(state);
2991da177e4SLinus Torvalds 
3001da177e4SLinus Torvalds 	/* default uniform distribution */
3011da177e4SLinus Torvalds 	if (dist == NULL)
3021da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
3031da177e4SLinus Torvalds 
3041da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3051da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3061da177e4SLinus Torvalds 	if (x >= 0)
3071da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3081da177e4SLinus Torvalds 	else
3091da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3101da177e4SLinus Torvalds 
3111da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3121da177e4SLinus Torvalds }
3131da177e4SLinus Torvalds 
31490b41a1cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
3157bc0f28cSHagen Paul Pfeifer {
31690b41a1cSHagen Paul Pfeifer 	u64 ticks;
317fc33cc72SEric Dumazet 
31890b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
31990b41a1cSHagen Paul Pfeifer 
32090b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
32190b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
32290b41a1cSHagen Paul Pfeifer 
32390b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
32490b41a1cSHagen Paul Pfeifer 			cells++;
32590b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
32690b41a1cSHagen Paul Pfeifer 	}
32790b41a1cSHagen Paul Pfeifer 
32890b41a1cSHagen Paul Pfeifer 	ticks = (u64)len * NSEC_PER_SEC;
32990b41a1cSHagen Paul Pfeifer 
33090b41a1cSHagen Paul Pfeifer 	do_div(ticks, q->rate);
331fc33cc72SEric Dumazet 	return PSCHED_NS2TICKS(ticks);
3327bc0f28cSHagen Paul Pfeifer }
3337bc0f28cSHagen Paul Pfeifer 
334960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
33550612537SEric Dumazet {
33650612537SEric Dumazet 	struct sk_buff_head *list = &sch->q;
33750612537SEric Dumazet 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
338960fb66eSEric Dumazet 	struct sk_buff *skb = skb_peek_tail(list);
33950612537SEric Dumazet 
34050612537SEric Dumazet 	/* Optimize for add at tail */
34150612537SEric Dumazet 	if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
342960fb66eSEric Dumazet 		return __skb_queue_tail(list, nskb);
34350612537SEric Dumazet 
34450612537SEric Dumazet 	skb_queue_reverse_walk(list, skb) {
34550612537SEric Dumazet 		if (tnext >= netem_skb_cb(skb)->time_to_send)
34650612537SEric Dumazet 			break;
34750612537SEric Dumazet 	}
34850612537SEric Dumazet 
34950612537SEric Dumazet 	__skb_queue_after(list, skb, nskb);
35050612537SEric Dumazet }
35150612537SEric Dumazet 
3520afb51e7SStephen Hemminger /*
3530afb51e7SStephen Hemminger  * Insert one skb into qdisc.
3540afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
3550afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
3560afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
3570afb51e7SStephen Hemminger  */
3581da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
3591da177e4SLinus Torvalds {
3601da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
36189e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
36289e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
3630afb51e7SStephen Hemminger 	struct sk_buff *skb2;
3640afb51e7SStephen Hemminger 	int count = 1;
3651da177e4SLinus Torvalds 
3660afb51e7SStephen Hemminger 	/* Random duplication */
3670afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
3680afb51e7SStephen Hemminger 		++count;
3690afb51e7SStephen Hemminger 
370661b7972Sstephen hemminger 	/* Drop packet? */
371e4ae004bSEric Dumazet 	if (loss_event(q)) {
372e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
373e4ae004bSEric Dumazet 			sch->qstats.drops++; /* mark packet */
374e4ae004bSEric Dumazet 		else
3750afb51e7SStephen Hemminger 			--count;
376e4ae004bSEric Dumazet 	}
3770afb51e7SStephen Hemminger 	if (count == 0) {
3781da177e4SLinus Torvalds 		sch->qstats.drops++;
3791da177e4SLinus Torvalds 		kfree_skb(skb);
380c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3811da177e4SLinus Torvalds 	}
3821da177e4SLinus Torvalds 
3834e8a5201SDavid S. Miller 	skb_orphan(skb);
3844e8a5201SDavid S. Miller 
3850afb51e7SStephen Hemminger 	/*
3860afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
3870afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
3880afb51e7SStephen Hemminger 	 * skb will be queued.
389d5d75cd6SStephen Hemminger 	 */
3900afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
3917698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
3920afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
3930afb51e7SStephen Hemminger 		q->duplicate = 0;
394d5d75cd6SStephen Hemminger 
3955f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
3960afb51e7SStephen Hemminger 		q->duplicate = dupsave;
3971da177e4SLinus Torvalds 	}
3981da177e4SLinus Torvalds 
399c865e5d9SStephen Hemminger 	/*
400c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
401c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
402c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
403c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
404c865e5d9SStephen Hemminger 	 */
405c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
406f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
407f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
408116a0fc3SEric Dumazet 		     skb_checksum_help(skb)))
409116a0fc3SEric Dumazet 			return qdisc_drop(skb, sch);
410c865e5d9SStephen Hemminger 
411c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
412c865e5d9SStephen Hemminger 	}
413c865e5d9SStephen Hemminger 
414960fb66eSEric Dumazet 	if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
415960fb66eSEric Dumazet 		return qdisc_reshape_fail(skb, sch);
416960fb66eSEric Dumazet 
417960fb66eSEric Dumazet 	sch->qstats.backlog += qdisc_pkt_len(skb);
418960fb66eSEric Dumazet 
4195f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
420f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
421a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
422f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
4230f9f32acSStephen Hemminger 		psched_time_t now;
42407aaa115SStephen Hemminger 		psched_tdiff_t delay;
42507aaa115SStephen Hemminger 
42607aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
42707aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
42807aaa115SStephen Hemminger 
4293bebcda2SPatrick McHardy 		now = psched_get_time();
4307bc0f28cSHagen Paul Pfeifer 
4317bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
43250612537SEric Dumazet 			struct sk_buff_head *list = &sch->q;
4337bc0f28cSHagen Paul Pfeifer 
43490b41a1cSHagen Paul Pfeifer 			delay += packet_len_2_sched_time(skb->len, q);
4357bc0f28cSHagen Paul Pfeifer 
4367bc0f28cSHagen Paul Pfeifer 			if (!skb_queue_empty(list)) {
4377bc0f28cSHagen Paul Pfeifer 				/*
4387bc0f28cSHagen Paul Pfeifer 				 * Last packet in queue is reference point (now).
4397bc0f28cSHagen Paul Pfeifer 				 * First packet in queue is already in flight,
4407bc0f28cSHagen Paul Pfeifer 				 * calculate this time bonus and substract
4417bc0f28cSHagen Paul Pfeifer 				 * from delay.
4427bc0f28cSHagen Paul Pfeifer 				 */
4437bc0f28cSHagen Paul Pfeifer 				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
4447bc0f28cSHagen Paul Pfeifer 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
4457bc0f28cSHagen Paul Pfeifer 			}
4467bc0f28cSHagen Paul Pfeifer 		}
4477bc0f28cSHagen Paul Pfeifer 
4487c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
4491da177e4SLinus Torvalds 		++q->counter;
450960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
4511da177e4SLinus Torvalds 	} else {
4520dca51d3SStephen Hemminger 		/*
4530dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
4540dca51d3SStephen Hemminger 		 * of the queue.
4550dca51d3SStephen Hemminger 		 */
4563bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
4570dca51d3SStephen Hemminger 		q->counter = 0;
4588ba25dadSJarek Poplawski 
45950612537SEric Dumazet 		__skb_queue_head(&sch->q, skb);
460eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
461378a2f09SJarek Poplawski 	}
4621da177e4SLinus Torvalds 
46310f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
4641da177e4SLinus Torvalds }
4651da177e4SLinus Torvalds 
4661da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch)
4671da177e4SLinus Torvalds {
4681da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
46950612537SEric Dumazet 	unsigned int len;
4701da177e4SLinus Torvalds 
47150612537SEric Dumazet 	len = qdisc_queue_drop(sch);
47250612537SEric Dumazet 	if (!len && q->qdisc && q->qdisc->ops->drop)
47350612537SEric Dumazet 	    len = q->qdisc->ops->drop(q->qdisc);
47450612537SEric Dumazet 	if (len)
4751da177e4SLinus Torvalds 		sch->qstats.drops++;
47650612537SEric Dumazet 
4771da177e4SLinus Torvalds 	return len;
4781da177e4SLinus Torvalds }
4791da177e4SLinus Torvalds 
4801da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
4811da177e4SLinus Torvalds {
4821da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4831da177e4SLinus Torvalds 	struct sk_buff *skb;
4841da177e4SLinus Torvalds 
485fd245a4aSEric Dumazet 	if (qdisc_is_throttled(sch))
48611274e5aSStephen Hemminger 		return NULL;
48711274e5aSStephen Hemminger 
48850612537SEric Dumazet tfifo_dequeue:
48950612537SEric Dumazet 	skb = qdisc_peek_head(sch);
490771018e7SStephen Hemminger 	if (skb) {
4915f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
4920f9f32acSStephen Hemminger 
4930f9f32acSStephen Hemminger 		/* if more time remaining? */
49450612537SEric Dumazet 		if (cb->time_to_send <= psched_get_time()) {
495cd961c2cSEric Dumazet 			__skb_unlink(skb, &sch->q);
496cd961c2cSEric Dumazet 			sch->qstats.backlog -= qdisc_pkt_len(skb);
49703c05f0dSJarek Poplawski 
4988caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
4998caf1539SJarek Poplawski 			/*
5008caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
5018caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
5028caf1539SJarek Poplawski 			 */
5038caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
5048caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
5058caf1539SJarek Poplawski #endif
50610f6dfcfSstephen hemminger 
50750612537SEric Dumazet 			if (q->qdisc) {
50850612537SEric Dumazet 				int err = qdisc_enqueue(skb, q->qdisc);
50950612537SEric Dumazet 
51050612537SEric Dumazet 				if (unlikely(err != NET_XMIT_SUCCESS)) {
51150612537SEric Dumazet 					if (net_xmit_drop_count(err)) {
51250612537SEric Dumazet 						sch->qstats.drops++;
51350612537SEric Dumazet 						qdisc_tree_decrease_qlen(sch, 1);
51450612537SEric Dumazet 					}
51550612537SEric Dumazet 				}
51650612537SEric Dumazet 				goto tfifo_dequeue;
51750612537SEric Dumazet 			}
51850612537SEric Dumazet deliver:
51910f6dfcfSstephen hemminger 			qdisc_unthrottled(sch);
52010f6dfcfSstephen hemminger 			qdisc_bstats_update(sch, skb);
5210f9f32acSStephen Hemminger 			return skb;
52211274e5aSStephen Hemminger 		}
52307aaa115SStephen Hemminger 
52450612537SEric Dumazet 		if (q->qdisc) {
52550612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
52650612537SEric Dumazet 			if (skb)
52750612537SEric Dumazet 				goto deliver;
52850612537SEric Dumazet 		}
52911274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
5300f9f32acSStephen Hemminger 	}
5310f9f32acSStephen Hemminger 
53250612537SEric Dumazet 	if (q->qdisc) {
53350612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
53450612537SEric Dumazet 		if (skb)
53550612537SEric Dumazet 			goto deliver;
53650612537SEric Dumazet 	}
5370f9f32acSStephen Hemminger 	return NULL;
5381da177e4SLinus Torvalds }
5391da177e4SLinus Torvalds 
5401da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
5411da177e4SLinus Torvalds {
5421da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5431da177e4SLinus Torvalds 
54450612537SEric Dumazet 	qdisc_reset_queue(sch);
54550612537SEric Dumazet 	if (q->qdisc)
5461da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
54759cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
5481da177e4SLinus Torvalds }
5491da177e4SLinus Torvalds 
5506373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
5516373a9a2Sstephen hemminger {
5526373a9a2Sstephen hemminger 	if (d) {
5536373a9a2Sstephen hemminger 		if (is_vmalloc_addr(d))
5546373a9a2Sstephen hemminger 			vfree(d);
5556373a9a2Sstephen hemminger 		else
5566373a9a2Sstephen hemminger 			kfree(d);
5576373a9a2Sstephen hemminger 	}
5586373a9a2Sstephen hemminger }
5596373a9a2Sstephen hemminger 
5601da177e4SLinus Torvalds /*
5611da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
5621da177e4SLinus Torvalds  * signed 16 bit values.
5631da177e4SLinus Torvalds  */
5641e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
5651da177e4SLinus Torvalds {
5661da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5676373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
5681e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
5697698b4fcSDavid S. Miller 	spinlock_t *root_lock;
5701da177e4SLinus Torvalds 	struct disttable *d;
5711da177e4SLinus Torvalds 	int i;
5726373a9a2Sstephen hemminger 	size_t s;
5731da177e4SLinus Torvalds 
574df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
5751da177e4SLinus Torvalds 		return -EINVAL;
5761da177e4SLinus Torvalds 
5776373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
578bb52c7acSEric Dumazet 	d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
5796373a9a2Sstephen hemminger 	if (!d)
5806373a9a2Sstephen hemminger 		d = vmalloc(s);
5811da177e4SLinus Torvalds 	if (!d)
5821da177e4SLinus Torvalds 		return -ENOMEM;
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds 	d->size = n;
5851da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
5861da177e4SLinus Torvalds 		d->table[i] = data[i];
5871da177e4SLinus Torvalds 
588102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
5897698b4fcSDavid S. Miller 
5907698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
591bb52c7acSEric Dumazet 	swap(q->delay_dist, d);
5927698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
593bb52c7acSEric Dumazet 
594bb52c7acSEric Dumazet 	dist_free(d);
5951da177e4SLinus Torvalds 	return 0;
5961da177e4SLinus Torvalds }
5971da177e4SLinus Torvalds 
598265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
5991da177e4SLinus Torvalds {
6001da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6011e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
6021da177e4SLinus Torvalds 
6031da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
6041da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
6051da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
6061da177e4SLinus Torvalds }
6071da177e4SLinus Torvalds 
608265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
6090dca51d3SStephen Hemminger {
6100dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6111e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
6120dca51d3SStephen Hemminger 
6130dca51d3SStephen Hemminger 	q->reorder = r->probability;
6140dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
6150dca51d3SStephen Hemminger }
6160dca51d3SStephen Hemminger 
617265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
618c865e5d9SStephen Hemminger {
619c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6201e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
621c865e5d9SStephen Hemminger 
622c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
623c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
624c865e5d9SStephen Hemminger }
625c865e5d9SStephen Hemminger 
6267bc0f28cSHagen Paul Pfeifer static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
6277bc0f28cSHagen Paul Pfeifer {
6287bc0f28cSHagen Paul Pfeifer 	struct netem_sched_data *q = qdisc_priv(sch);
6297bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
6307bc0f28cSHagen Paul Pfeifer 
6317bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
63290b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
63390b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
63490b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
63590b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
63690b41a1cSHagen Paul Pfeifer 	q->cell_overhead = r->cell_overhead;
6377bc0f28cSHagen Paul Pfeifer }
6387bc0f28cSHagen Paul Pfeifer 
639661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
640661b7972Sstephen hemminger {
641661b7972Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
642661b7972Sstephen hemminger 	const struct nlattr *la;
643661b7972Sstephen hemminger 	int rem;
644661b7972Sstephen hemminger 
645661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
646661b7972Sstephen hemminger 		u16 type = nla_type(la);
647661b7972Sstephen hemminger 
648661b7972Sstephen hemminger 		switch(type) {
649661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
650661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
651661b7972Sstephen hemminger 
6522494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
653661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
654661b7972Sstephen hemminger 				return -EINVAL;
655661b7972Sstephen hemminger 			}
656661b7972Sstephen hemminger 
657661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
658661b7972Sstephen hemminger 
659661b7972Sstephen hemminger 			q->clg.state = 1;
660661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
661661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
662661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
663661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
664661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
665661b7972Sstephen hemminger 			break;
666661b7972Sstephen hemminger 		}
667661b7972Sstephen hemminger 
668661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
669661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
670661b7972Sstephen hemminger 
6712494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
6722494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
673661b7972Sstephen hemminger 				return -EINVAL;
674661b7972Sstephen hemminger 			}
675661b7972Sstephen hemminger 
676661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
677661b7972Sstephen hemminger 			q->clg.state = 1;
678661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
679661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
680661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
681661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
682661b7972Sstephen hemminger 			break;
683661b7972Sstephen hemminger 		}
684661b7972Sstephen hemminger 
685661b7972Sstephen hemminger 		default:
686661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
687661b7972Sstephen hemminger 			return -EINVAL;
688661b7972Sstephen hemminger 		}
689661b7972Sstephen hemminger 	}
690661b7972Sstephen hemminger 
691661b7972Sstephen hemminger 	return 0;
692661b7972Sstephen hemminger }
693661b7972Sstephen hemminger 
69427a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
69527a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
69627a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
69727a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
6987bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
699661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
700e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
70127a3421eSPatrick McHardy };
70227a3421eSPatrick McHardy 
7032c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
7042c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
7052c10b32bSThomas Graf {
7062c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
7072c10b32bSThomas Graf 
708661b7972Sstephen hemminger 	if (nested_len < 0) {
709661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
7102c10b32bSThomas Graf 		return -EINVAL;
711661b7972Sstephen hemminger 	}
712661b7972Sstephen hemminger 
7132c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
7142c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
7152c10b32bSThomas Graf 				 nested_len, policy);
716661b7972Sstephen hemminger 
7172c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
7182c10b32bSThomas Graf 	return 0;
7192c10b32bSThomas Graf }
7202c10b32bSThomas Graf 
721c865e5d9SStephen Hemminger /* Parse netlink message to set options */
7221e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
7231da177e4SLinus Torvalds {
7241da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
725b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
7261da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
7271da177e4SLinus Torvalds 	int ret;
7281da177e4SLinus Torvalds 
729b03f4672SPatrick McHardy 	if (opt == NULL)
7301da177e4SLinus Torvalds 		return -EINVAL;
7311da177e4SLinus Torvalds 
7322c10b32bSThomas Graf 	qopt = nla_data(opt);
7332c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
734b03f4672SPatrick McHardy 	if (ret < 0)
735b03f4672SPatrick McHardy 		return ret;
736b03f4672SPatrick McHardy 
73750612537SEric Dumazet 	sch->limit = qopt->limit;
7381da177e4SLinus Torvalds 
7391da177e4SLinus Torvalds 	q->latency = qopt->latency;
7401da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
7411da177e4SLinus Torvalds 	q->limit = qopt->limit;
7421da177e4SLinus Torvalds 	q->gap = qopt->gap;
7430dca51d3SStephen Hemminger 	q->counter = 0;
7441da177e4SLinus Torvalds 	q->loss = qopt->loss;
7451da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
7461da177e4SLinus Torvalds 
747bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
748bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
7490dca51d3SStephen Hemminger 	 */
750a362e0a7SStephen Hemminger 	if (q->gap)
7510dca51d3SStephen Hemminger 		q->reorder = ~0;
7520dca51d3SStephen Hemminger 
753265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
754265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
7551da177e4SLinus Torvalds 
7561e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
7571e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
7581da177e4SLinus Torvalds 		if (ret)
7591da177e4SLinus Torvalds 			return ret;
7601da177e4SLinus Torvalds 	}
761c865e5d9SStephen Hemminger 
762265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
763265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
7641da177e4SLinus Torvalds 
765265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
766265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
7671da177e4SLinus Torvalds 
7687bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
7697bc0f28cSHagen Paul Pfeifer 		get_rate(sch, tb[TCA_NETEM_RATE]);
7707bc0f28cSHagen Paul Pfeifer 
771e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
772e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
773e4ae004bSEric Dumazet 
774661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
775661b7972Sstephen hemminger 	if (tb[TCA_NETEM_LOSS])
776661b7972Sstephen hemminger 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
777661b7972Sstephen hemminger 
778661b7972Sstephen hemminger 	return ret;
7791da177e4SLinus Torvalds }
7801da177e4SLinus Torvalds 
7811e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
7821da177e4SLinus Torvalds {
7831da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7841da177e4SLinus Torvalds 	int ret;
7851da177e4SLinus Torvalds 
7861da177e4SLinus Torvalds 	if (!opt)
7871da177e4SLinus Torvalds 		return -EINVAL;
7881da177e4SLinus Torvalds 
78959cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
7901da177e4SLinus Torvalds 
791661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
7921da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
79350612537SEric Dumazet 	if (ret)
794250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
7951da177e4SLinus Torvalds 	return ret;
7961da177e4SLinus Torvalds }
7971da177e4SLinus Torvalds 
7981da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
7991da177e4SLinus Torvalds {
8001da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
8011da177e4SLinus Torvalds 
80259cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
80350612537SEric Dumazet 	if (q->qdisc)
8041da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
8056373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
8061da177e4SLinus Torvalds }
8071da177e4SLinus Torvalds 
808661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
809661b7972Sstephen hemminger 			   struct sk_buff *skb)
810661b7972Sstephen hemminger {
811661b7972Sstephen hemminger 	struct nlattr *nest;
812661b7972Sstephen hemminger 
813661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
814661b7972Sstephen hemminger 	if (nest == NULL)
815661b7972Sstephen hemminger 		goto nla_put_failure;
816661b7972Sstephen hemminger 
817661b7972Sstephen hemminger 	switch (q->loss_model) {
818661b7972Sstephen hemminger 	case CLG_RANDOM:
819661b7972Sstephen hemminger 		/* legacy loss model */
820661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
821661b7972Sstephen hemminger 		return 0;	/* no data */
822661b7972Sstephen hemminger 
823661b7972Sstephen hemminger 	case CLG_4_STATES: {
824661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
825661b7972Sstephen hemminger 			.p13 = q->clg.a1,
826661b7972Sstephen hemminger 			.p31 = q->clg.a2,
827661b7972Sstephen hemminger 			.p32 = q->clg.a3,
828661b7972Sstephen hemminger 			.p14 = q->clg.a4,
829661b7972Sstephen hemminger 			.p23 = q->clg.a5,
830661b7972Sstephen hemminger 		};
831661b7972Sstephen hemminger 
8321b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
8331b34ec43SDavid S. Miller 			goto nla_put_failure;
834661b7972Sstephen hemminger 		break;
835661b7972Sstephen hemminger 	}
836661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
837661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
838661b7972Sstephen hemminger 			.p = q->clg.a1,
839661b7972Sstephen hemminger 			.r = q->clg.a2,
840661b7972Sstephen hemminger 			.h = q->clg.a3,
841661b7972Sstephen hemminger 			.k1 = q->clg.a4,
842661b7972Sstephen hemminger 		};
843661b7972Sstephen hemminger 
8441b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
8451b34ec43SDavid S. Miller 			goto nla_put_failure;
846661b7972Sstephen hemminger 		break;
847661b7972Sstephen hemminger 	}
848661b7972Sstephen hemminger 	}
849661b7972Sstephen hemminger 
850661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
851661b7972Sstephen hemminger 	return 0;
852661b7972Sstephen hemminger 
853661b7972Sstephen hemminger nla_put_failure:
854661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
855661b7972Sstephen hemminger 	return -1;
856661b7972Sstephen hemminger }
857661b7972Sstephen hemminger 
8581da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
8591da177e4SLinus Torvalds {
8601da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
861861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
8621da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
8631da177e4SLinus Torvalds 	struct tc_netem_corr cor;
8640dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
865c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
8667bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
8671da177e4SLinus Torvalds 
8681da177e4SLinus Torvalds 	qopt.latency = q->latency;
8691da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
8701da177e4SLinus Torvalds 	qopt.limit = q->limit;
8711da177e4SLinus Torvalds 	qopt.loss = q->loss;
8721da177e4SLinus Torvalds 	qopt.gap = q->gap;
8731da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
8741b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
8751b34ec43SDavid S. Miller 		goto nla_put_failure;
8761da177e4SLinus Torvalds 
8771da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
8781da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
8791da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
8801b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
8811b34ec43SDavid S. Miller 		goto nla_put_failure;
8820dca51d3SStephen Hemminger 
8830dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
8840dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
8851b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
8861b34ec43SDavid S. Miller 		goto nla_put_failure;
8870dca51d3SStephen Hemminger 
888c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
889c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
8901b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
8911b34ec43SDavid S. Miller 		goto nla_put_failure;
892c865e5d9SStephen Hemminger 
8937bc0f28cSHagen Paul Pfeifer 	rate.rate = q->rate;
89490b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
89590b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
89690b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
8971b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
8981b34ec43SDavid S. Miller 		goto nla_put_failure;
8997bc0f28cSHagen Paul Pfeifer 
900e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
901e4ae004bSEric Dumazet 		goto nla_put_failure;
902e4ae004bSEric Dumazet 
903661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
904661b7972Sstephen hemminger 		goto nla_put_failure;
905661b7972Sstephen hemminger 
906861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
9071da177e4SLinus Torvalds 
9081e90474cSPatrick McHardy nla_put_failure:
909861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
9101da177e4SLinus Torvalds 	return -1;
9111da177e4SLinus Torvalds }
9121da177e4SLinus Torvalds 
91310f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
91410f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
91510f6dfcfSstephen hemminger {
91610f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
91710f6dfcfSstephen hemminger 
91850612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
91910f6dfcfSstephen hemminger 		return -ENOENT;
92010f6dfcfSstephen hemminger 
92110f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
92210f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
92310f6dfcfSstephen hemminger 
92410f6dfcfSstephen hemminger 	return 0;
92510f6dfcfSstephen hemminger }
92610f6dfcfSstephen hemminger 
92710f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
92810f6dfcfSstephen hemminger 		     struct Qdisc **old)
92910f6dfcfSstephen hemminger {
93010f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
93110f6dfcfSstephen hemminger 
93210f6dfcfSstephen hemminger 	sch_tree_lock(sch);
93310f6dfcfSstephen hemminger 	*old = q->qdisc;
93410f6dfcfSstephen hemminger 	q->qdisc = new;
93550612537SEric Dumazet 	if (*old) {
93610f6dfcfSstephen hemminger 		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
93710f6dfcfSstephen hemminger 		qdisc_reset(*old);
93850612537SEric Dumazet 	}
93910f6dfcfSstephen hemminger 	sch_tree_unlock(sch);
94010f6dfcfSstephen hemminger 
94110f6dfcfSstephen hemminger 	return 0;
94210f6dfcfSstephen hemminger }
94310f6dfcfSstephen hemminger 
94410f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
94510f6dfcfSstephen hemminger {
94610f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
94710f6dfcfSstephen hemminger 	return q->qdisc;
94810f6dfcfSstephen hemminger }
94910f6dfcfSstephen hemminger 
95010f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
95110f6dfcfSstephen hemminger {
95210f6dfcfSstephen hemminger 	return 1;
95310f6dfcfSstephen hemminger }
95410f6dfcfSstephen hemminger 
95510f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
95610f6dfcfSstephen hemminger {
95710f6dfcfSstephen hemminger }
95810f6dfcfSstephen hemminger 
95910f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
96010f6dfcfSstephen hemminger {
96110f6dfcfSstephen hemminger 	if (!walker->stop) {
96210f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
96310f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
96410f6dfcfSstephen hemminger 				walker->stop = 1;
96510f6dfcfSstephen hemminger 				return;
96610f6dfcfSstephen hemminger 			}
96710f6dfcfSstephen hemminger 		walker->count++;
96810f6dfcfSstephen hemminger 	}
96910f6dfcfSstephen hemminger }
97010f6dfcfSstephen hemminger 
97110f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
97210f6dfcfSstephen hemminger 	.graft		=	netem_graft,
97310f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
97410f6dfcfSstephen hemminger 	.get		=	netem_get,
97510f6dfcfSstephen hemminger 	.put		=	netem_put,
97610f6dfcfSstephen hemminger 	.walk		=	netem_walk,
97710f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
97810f6dfcfSstephen hemminger };
97910f6dfcfSstephen hemminger 
98020fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
9811da177e4SLinus Torvalds 	.id		=	"netem",
98210f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
9831da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
9841da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
9851da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
98677be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
9871da177e4SLinus Torvalds 	.drop		=	netem_drop,
9881da177e4SLinus Torvalds 	.init		=	netem_init,
9891da177e4SLinus Torvalds 	.reset		=	netem_reset,
9901da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
9911da177e4SLinus Torvalds 	.change		=	netem_change,
9921da177e4SLinus Torvalds 	.dump		=	netem_dump,
9931da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
9941da177e4SLinus Torvalds };
9951da177e4SLinus Torvalds 
9961da177e4SLinus Torvalds 
9971da177e4SLinus Torvalds static int __init netem_module_init(void)
9981da177e4SLinus Torvalds {
999eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
10001da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
10011da177e4SLinus Torvalds }
10021da177e4SLinus Torvalds static void __exit netem_module_exit(void)
10031da177e4SLinus Torvalds {
10041da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
10051da177e4SLinus Torvalds }
10061da177e4SLinus Torvalds module_init(netem_module_init)
10071da177e4SLinus Torvalds module_exit(netem_module_exit)
10081da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1009