xref: /openbmc/linux/net/sched/sch_netem.c (revision a13d3104)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
261da177e4SLinus Torvalds 
27dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
281da177e4SLinus Torvalds #include <net/pkt_sched.h>
29e4ae004bSEric Dumazet #include <net/inet_ecn.h>
301da177e4SLinus Torvalds 
31250a65f7Sstephen hemminger #define VERSION "1.3"
32eb229c4cSStephen Hemminger 
331da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
341da177e4SLinus Torvalds 	====================================
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
371da177e4SLinus Torvalds 		 Network Emulation Tool
381da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds 	 ----------------------------------------------------------------
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
431da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
441da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
451da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
461da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
471da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
481da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
511da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
521da177e4SLinus Torvalds 	 control either since that can be handled by using token
531da177e4SLinus Torvalds 	 bucket or other rate control.
54661b7972Sstephen hemminger 
55661b7972Sstephen hemminger      Correlated Loss Generator models
56661b7972Sstephen hemminger 
57661b7972Sstephen hemminger 	Added generation of correlated loss according to the
58661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
59661b7972Sstephen hemminger 
60661b7972Sstephen hemminger 	References:
61661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
62661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
63661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
64661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
65661b7972Sstephen hemminger 
66661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
67661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
681da177e4SLinus Torvalds */
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds struct netem_sched_data {
7150612537SEric Dumazet 	/* internal t(ime)fifo qdisc uses sch->q and sch->limit */
7250612537SEric Dumazet 
7350612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
741da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7550612537SEric Dumazet 
7659cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
771da177e4SLinus Torvalds 
78b407621cSStephen Hemminger 	psched_tdiff_t latency;
79b407621cSStephen Hemminger 	psched_tdiff_t jitter;
80b407621cSStephen Hemminger 
811da177e4SLinus Torvalds 	u32 loss;
82e4ae004bSEric Dumazet 	u32 ecn;
831da177e4SLinus Torvalds 	u32 limit;
841da177e4SLinus Torvalds 	u32 counter;
851da177e4SLinus Torvalds 	u32 gap;
861da177e4SLinus Torvalds 	u32 duplicate;
870dca51d3SStephen Hemminger 	u32 reorder;
88c865e5d9SStephen Hemminger 	u32 corrupt;
897bc0f28cSHagen Paul Pfeifer 	u32 rate;
9090b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9190b41a1cSHagen Paul Pfeifer 	u32 cell_size;
9290b41a1cSHagen Paul Pfeifer 	u32 cell_size_reciprocal;
9390b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
941da177e4SLinus Torvalds 
951da177e4SLinus Torvalds 	struct crndstate {
96b407621cSStephen Hemminger 		u32 last;
97b407621cSStephen Hemminger 		u32 rho;
98c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 	struct disttable {
1011da177e4SLinus Torvalds 		u32  size;
1021da177e4SLinus Torvalds 		s16 table[0];
1031da177e4SLinus Torvalds 	} *delay_dist;
104661b7972Sstephen hemminger 
105661b7972Sstephen hemminger 	enum  {
106661b7972Sstephen hemminger 		CLG_RANDOM,
107661b7972Sstephen hemminger 		CLG_4_STATES,
108661b7972Sstephen hemminger 		CLG_GILB_ELL,
109661b7972Sstephen hemminger 	} loss_model;
110661b7972Sstephen hemminger 
111661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
112661b7972Sstephen hemminger 	struct clgstate {
113661b7972Sstephen hemminger 		/* state of the Markov chain */
114661b7972Sstephen hemminger 		u8 state;
115661b7972Sstephen hemminger 
116661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
117661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
118661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
119661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
120661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
121661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
122661b7972Sstephen hemminger 	} clg;
123661b7972Sstephen hemminger 
1241da177e4SLinus Torvalds };
1251da177e4SLinus Torvalds 
12650612537SEric Dumazet /* Time stamp put into socket buffer control block
12750612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
12850612537SEric Dumazet  */
1291da177e4SLinus Torvalds struct netem_skb_cb {
1301da177e4SLinus Torvalds 	psched_time_t	time_to_send;
1311da177e4SLinus Torvalds };
1321da177e4SLinus Torvalds 
1335f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1345f86173bSJussi Kivilinna {
13516bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
136175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1375f86173bSJussi Kivilinna }
1385f86173bSJussi Kivilinna 
1391da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1401da177e4SLinus Torvalds  * Use entropy source for initial seed.
1411da177e4SLinus Torvalds  */
1421da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1431da177e4SLinus Torvalds {
1441da177e4SLinus Torvalds 	state->rho = rho;
1451da177e4SLinus Torvalds 	state->last = net_random();
1461da177e4SLinus Torvalds }
1471da177e4SLinus Torvalds 
1481da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1491da177e4SLinus Torvalds  * Next number depends on last value.
1501da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1511da177e4SLinus Torvalds  */
152b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1531da177e4SLinus Torvalds {
1541da177e4SLinus Torvalds 	u64 value, rho;
1551da177e4SLinus Torvalds 	unsigned long answer;
1561da177e4SLinus Torvalds 
157bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1581da177e4SLinus Torvalds 		return net_random();
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds 	value = net_random();
1611da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1621da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1631da177e4SLinus Torvalds 	state->last = answer;
1641da177e4SLinus Torvalds 	return answer;
1651da177e4SLinus Torvalds }
1661da177e4SLinus Torvalds 
167661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
168661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
169661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
170661b7972Sstephen hemminger  */
171661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
172661b7972Sstephen hemminger {
173661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
174661b7972Sstephen hemminger 	u32 rnd = net_random();
175661b7972Sstephen hemminger 
176661b7972Sstephen hemminger 	/*
17725985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
178661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
179661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
180661b7972Sstephen hemminger 	 * The four states correspond to:
181661b7972Sstephen hemminger 	 *   1 => successfully transmitted packets within a gap period
182661b7972Sstephen hemminger 	 *   4 => isolated losses within a gap period
183661b7972Sstephen hemminger 	 *   3 => lost packets within a burst period
184661b7972Sstephen hemminger 	 *   2 => successfully transmitted packets within a burst period
185661b7972Sstephen hemminger 	 */
186661b7972Sstephen hemminger 	switch (clg->state) {
187661b7972Sstephen hemminger 	case 1:
188661b7972Sstephen hemminger 		if (rnd < clg->a4) {
189661b7972Sstephen hemminger 			clg->state = 4;
190661b7972Sstephen hemminger 			return true;
191661b7972Sstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1) {
192661b7972Sstephen hemminger 			clg->state = 3;
193661b7972Sstephen hemminger 			return true;
194661b7972Sstephen hemminger 		} else if (clg->a1 < rnd)
195661b7972Sstephen hemminger 			clg->state = 1;
196661b7972Sstephen hemminger 
197661b7972Sstephen hemminger 		break;
198661b7972Sstephen hemminger 	case 2:
199661b7972Sstephen hemminger 		if (rnd < clg->a5) {
200661b7972Sstephen hemminger 			clg->state = 3;
201661b7972Sstephen hemminger 			return true;
202661b7972Sstephen hemminger 		} else
203661b7972Sstephen hemminger 			clg->state = 2;
204661b7972Sstephen hemminger 
205661b7972Sstephen hemminger 		break;
206661b7972Sstephen hemminger 	case 3:
207661b7972Sstephen hemminger 		if (rnd < clg->a3)
208661b7972Sstephen hemminger 			clg->state = 2;
209661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
210661b7972Sstephen hemminger 			clg->state = 1;
211661b7972Sstephen hemminger 			return true;
212661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
213661b7972Sstephen hemminger 			clg->state = 3;
214661b7972Sstephen hemminger 			return true;
215661b7972Sstephen hemminger 		}
216661b7972Sstephen hemminger 		break;
217661b7972Sstephen hemminger 	case 4:
218661b7972Sstephen hemminger 		clg->state = 1;
219661b7972Sstephen hemminger 		break;
220661b7972Sstephen hemminger 	}
221661b7972Sstephen hemminger 
222661b7972Sstephen hemminger 	return false;
223661b7972Sstephen hemminger }
224661b7972Sstephen hemminger 
225661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
226661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
227661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
228661b7972Sstephen hemminger  *
22925985edcSLucas De Marchi  * Makes a comparison between random number and the transition
230661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
23125985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
232661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
233661b7972Sstephen hemminger  * packet will be transmitted or lost.
234661b7972Sstephen hemminger  */
235661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
236661b7972Sstephen hemminger {
237661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
238661b7972Sstephen hemminger 
239661b7972Sstephen hemminger 	switch (clg->state) {
240661b7972Sstephen hemminger 	case 1:
241661b7972Sstephen hemminger 		if (net_random() < clg->a1)
242661b7972Sstephen hemminger 			clg->state = 2;
243661b7972Sstephen hemminger 		if (net_random() < clg->a4)
244661b7972Sstephen hemminger 			return true;
245661b7972Sstephen hemminger 	case 2:
246661b7972Sstephen hemminger 		if (net_random() < clg->a2)
247661b7972Sstephen hemminger 			clg->state = 1;
248661b7972Sstephen hemminger 		if (clg->a3 > net_random())
249661b7972Sstephen hemminger 			return true;
250661b7972Sstephen hemminger 	}
251661b7972Sstephen hemminger 
252661b7972Sstephen hemminger 	return false;
253661b7972Sstephen hemminger }
254661b7972Sstephen hemminger 
255661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
256661b7972Sstephen hemminger {
257661b7972Sstephen hemminger 	switch (q->loss_model) {
258661b7972Sstephen hemminger 	case CLG_RANDOM:
259661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
260661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
261661b7972Sstephen hemminger 
262661b7972Sstephen hemminger 	case CLG_4_STATES:
263661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
264661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
265661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
266661b7972Sstephen hemminger 		* the kernel logs
267661b7972Sstephen hemminger 		*/
268661b7972Sstephen hemminger 		return loss_4state(q);
269661b7972Sstephen hemminger 
270661b7972Sstephen hemminger 	case CLG_GILB_ELL:
271661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
272661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
273661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
274661b7972Sstephen hemminger 		* the kernel logs
275661b7972Sstephen hemminger 		*/
276661b7972Sstephen hemminger 		return loss_gilb_ell(q);
277661b7972Sstephen hemminger 	}
278661b7972Sstephen hemminger 
279661b7972Sstephen hemminger 	return false;	/* not reached */
280661b7972Sstephen hemminger }
281661b7972Sstephen hemminger 
282661b7972Sstephen hemminger 
2831da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
2841da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
2851da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
2861da177e4SLinus Torvalds  */
287b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
288b407621cSStephen Hemminger 				struct crndstate *state,
289b407621cSStephen Hemminger 				const struct disttable *dist)
2901da177e4SLinus Torvalds {
291b407621cSStephen Hemminger 	psched_tdiff_t x;
292b407621cSStephen Hemminger 	long t;
293b407621cSStephen Hemminger 	u32 rnd;
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds 	if (sigma == 0)
2961da177e4SLinus Torvalds 		return mu;
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds 	rnd = get_crandom(state);
2991da177e4SLinus Torvalds 
3001da177e4SLinus Torvalds 	/* default uniform distribution */
3011da177e4SLinus Torvalds 	if (dist == NULL)
3021da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
3031da177e4SLinus Torvalds 
3041da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3051da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3061da177e4SLinus Torvalds 	if (x >= 0)
3071da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3081da177e4SLinus Torvalds 	else
3091da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3101da177e4SLinus Torvalds 
3111da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3121da177e4SLinus Torvalds }
3131da177e4SLinus Torvalds 
31490b41a1cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
3157bc0f28cSHagen Paul Pfeifer {
31690b41a1cSHagen Paul Pfeifer 	u64 ticks;
317fc33cc72SEric Dumazet 
31890b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
31990b41a1cSHagen Paul Pfeifer 
32090b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
32190b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
32290b41a1cSHagen Paul Pfeifer 
32390b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
32490b41a1cSHagen Paul Pfeifer 			cells++;
32590b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
32690b41a1cSHagen Paul Pfeifer 	}
32790b41a1cSHagen Paul Pfeifer 
32890b41a1cSHagen Paul Pfeifer 	ticks = (u64)len * NSEC_PER_SEC;
32990b41a1cSHagen Paul Pfeifer 
33090b41a1cSHagen Paul Pfeifer 	do_div(ticks, q->rate);
331fc33cc72SEric Dumazet 	return PSCHED_NS2TICKS(ticks);
3327bc0f28cSHagen Paul Pfeifer }
3337bc0f28cSHagen Paul Pfeifer 
334960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
33550612537SEric Dumazet {
33650612537SEric Dumazet 	struct sk_buff_head *list = &sch->q;
33750612537SEric Dumazet 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
338960fb66eSEric Dumazet 	struct sk_buff *skb = skb_peek_tail(list);
33950612537SEric Dumazet 
34050612537SEric Dumazet 	/* Optimize for add at tail */
34150612537SEric Dumazet 	if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
342960fb66eSEric Dumazet 		return __skb_queue_tail(list, nskb);
34350612537SEric Dumazet 
34450612537SEric Dumazet 	skb_queue_reverse_walk(list, skb) {
34550612537SEric Dumazet 		if (tnext >= netem_skb_cb(skb)->time_to_send)
34650612537SEric Dumazet 			break;
34750612537SEric Dumazet 	}
34850612537SEric Dumazet 
34950612537SEric Dumazet 	__skb_queue_after(list, skb, nskb);
35050612537SEric Dumazet }
35150612537SEric Dumazet 
3520afb51e7SStephen Hemminger /*
3530afb51e7SStephen Hemminger  * Insert one skb into qdisc.
3540afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
3550afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
3560afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
3570afb51e7SStephen Hemminger  */
3581da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
3591da177e4SLinus Torvalds {
3601da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
36189e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
36289e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
3630afb51e7SStephen Hemminger 	struct sk_buff *skb2;
3640afb51e7SStephen Hemminger 	int count = 1;
3651da177e4SLinus Torvalds 
3660afb51e7SStephen Hemminger 	/* Random duplication */
3670afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
3680afb51e7SStephen Hemminger 		++count;
3690afb51e7SStephen Hemminger 
370661b7972Sstephen hemminger 	/* Drop packet? */
371e4ae004bSEric Dumazet 	if (loss_event(q)) {
372e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
373e4ae004bSEric Dumazet 			sch->qstats.drops++; /* mark packet */
374e4ae004bSEric Dumazet 		else
3750afb51e7SStephen Hemminger 			--count;
376e4ae004bSEric Dumazet 	}
3770afb51e7SStephen Hemminger 	if (count == 0) {
3781da177e4SLinus Torvalds 		sch->qstats.drops++;
3791da177e4SLinus Torvalds 		kfree_skb(skb);
380c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3811da177e4SLinus Torvalds 	}
3821da177e4SLinus Torvalds 
3835a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
3845a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
3855a308f40SEric Dumazet 	 * Ideally, this orphaning should be done after the rate limiting
3865a308f40SEric Dumazet 	 * module, because this breaks TCP Small Queue, and other mechanisms
3875a308f40SEric Dumazet 	 * based on socket sk_wmem_alloc.
3885a308f40SEric Dumazet 	 */
3895a308f40SEric Dumazet 	if (q->latency || q->jitter)
3904e8a5201SDavid S. Miller 		skb_orphan(skb);
3914e8a5201SDavid S. Miller 
3920afb51e7SStephen Hemminger 	/*
3930afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
3940afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
3950afb51e7SStephen Hemminger 	 * skb will be queued.
396d5d75cd6SStephen Hemminger 	 */
3970afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
3987698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
3990afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
4000afb51e7SStephen Hemminger 		q->duplicate = 0;
401d5d75cd6SStephen Hemminger 
4025f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
4030afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4041da177e4SLinus Torvalds 	}
4051da177e4SLinus Torvalds 
406c865e5d9SStephen Hemminger 	/*
407c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
408c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
409c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
410c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
411c865e5d9SStephen Hemminger 	 */
412c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
413f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
414f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
415116a0fc3SEric Dumazet 		     skb_checksum_help(skb)))
416116a0fc3SEric Dumazet 			return qdisc_drop(skb, sch);
417c865e5d9SStephen Hemminger 
418c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
419c865e5d9SStephen Hemminger 	}
420c865e5d9SStephen Hemminger 
421960fb66eSEric Dumazet 	if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
422960fb66eSEric Dumazet 		return qdisc_reshape_fail(skb, sch);
423960fb66eSEric Dumazet 
424960fb66eSEric Dumazet 	sch->qstats.backlog += qdisc_pkt_len(skb);
425960fb66eSEric Dumazet 
4265f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
427f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
428a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
429f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
4300f9f32acSStephen Hemminger 		psched_time_t now;
43107aaa115SStephen Hemminger 		psched_tdiff_t delay;
43207aaa115SStephen Hemminger 
43307aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
43407aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
43507aaa115SStephen Hemminger 
4363bebcda2SPatrick McHardy 		now = psched_get_time();
4377bc0f28cSHagen Paul Pfeifer 
4387bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
43950612537SEric Dumazet 			struct sk_buff_head *list = &sch->q;
4407bc0f28cSHagen Paul Pfeifer 
4417bc0f28cSHagen Paul Pfeifer 			if (!skb_queue_empty(list)) {
4427bc0f28cSHagen Paul Pfeifer 				/*
443a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
444a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
4457bc0f28cSHagen Paul Pfeifer 				 * from delay.
4467bc0f28cSHagen Paul Pfeifer 				 */
447a13d3104SJohannes Naab 				delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now;
448a13d3104SJohannes Naab 				delay = max_t(psched_tdiff_t, 0, delay);
4497bc0f28cSHagen Paul Pfeifer 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
4507bc0f28cSHagen Paul Pfeifer 			}
451a13d3104SJohannes Naab 
452a13d3104SJohannes Naab 			delay += packet_len_2_sched_time(skb->len, q);
4537bc0f28cSHagen Paul Pfeifer 		}
4547bc0f28cSHagen Paul Pfeifer 
4557c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
4561da177e4SLinus Torvalds 		++q->counter;
457960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
4581da177e4SLinus Torvalds 	} else {
4590dca51d3SStephen Hemminger 		/*
4600dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
4610dca51d3SStephen Hemminger 		 * of the queue.
4620dca51d3SStephen Hemminger 		 */
4633bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
4640dca51d3SStephen Hemminger 		q->counter = 0;
4658ba25dadSJarek Poplawski 
46650612537SEric Dumazet 		__skb_queue_head(&sch->q, skb);
467eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
468378a2f09SJarek Poplawski 	}
4691da177e4SLinus Torvalds 
47010f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
4711da177e4SLinus Torvalds }
4721da177e4SLinus Torvalds 
4731da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch)
4741da177e4SLinus Torvalds {
4751da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
47650612537SEric Dumazet 	unsigned int len;
4771da177e4SLinus Torvalds 
47850612537SEric Dumazet 	len = qdisc_queue_drop(sch);
47950612537SEric Dumazet 	if (!len && q->qdisc && q->qdisc->ops->drop)
48050612537SEric Dumazet 	    len = q->qdisc->ops->drop(q->qdisc);
48150612537SEric Dumazet 	if (len)
4821da177e4SLinus Torvalds 		sch->qstats.drops++;
48350612537SEric Dumazet 
4841da177e4SLinus Torvalds 	return len;
4851da177e4SLinus Torvalds }
4861da177e4SLinus Torvalds 
4871da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
4881da177e4SLinus Torvalds {
4891da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4901da177e4SLinus Torvalds 	struct sk_buff *skb;
4911da177e4SLinus Torvalds 
492fd245a4aSEric Dumazet 	if (qdisc_is_throttled(sch))
49311274e5aSStephen Hemminger 		return NULL;
49411274e5aSStephen Hemminger 
49550612537SEric Dumazet tfifo_dequeue:
49650612537SEric Dumazet 	skb = qdisc_peek_head(sch);
497771018e7SStephen Hemminger 	if (skb) {
4985f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
4990f9f32acSStephen Hemminger 
5000f9f32acSStephen Hemminger 		/* if more time remaining? */
50150612537SEric Dumazet 		if (cb->time_to_send <= psched_get_time()) {
502cd961c2cSEric Dumazet 			__skb_unlink(skb, &sch->q);
503cd961c2cSEric Dumazet 			sch->qstats.backlog -= qdisc_pkt_len(skb);
50403c05f0dSJarek Poplawski 
5058caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
5068caf1539SJarek Poplawski 			/*
5078caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
5088caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
5098caf1539SJarek Poplawski 			 */
5108caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
5118caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
5128caf1539SJarek Poplawski #endif
51310f6dfcfSstephen hemminger 
51450612537SEric Dumazet 			if (q->qdisc) {
51550612537SEric Dumazet 				int err = qdisc_enqueue(skb, q->qdisc);
51650612537SEric Dumazet 
51750612537SEric Dumazet 				if (unlikely(err != NET_XMIT_SUCCESS)) {
51850612537SEric Dumazet 					if (net_xmit_drop_count(err)) {
51950612537SEric Dumazet 						sch->qstats.drops++;
52050612537SEric Dumazet 						qdisc_tree_decrease_qlen(sch, 1);
52150612537SEric Dumazet 					}
52250612537SEric Dumazet 				}
52350612537SEric Dumazet 				goto tfifo_dequeue;
52450612537SEric Dumazet 			}
52550612537SEric Dumazet deliver:
52610f6dfcfSstephen hemminger 			qdisc_unthrottled(sch);
52710f6dfcfSstephen hemminger 			qdisc_bstats_update(sch, skb);
5280f9f32acSStephen Hemminger 			return skb;
52911274e5aSStephen Hemminger 		}
53007aaa115SStephen Hemminger 
53150612537SEric Dumazet 		if (q->qdisc) {
53250612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
53350612537SEric Dumazet 			if (skb)
53450612537SEric Dumazet 				goto deliver;
53550612537SEric Dumazet 		}
53611274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
5370f9f32acSStephen Hemminger 	}
5380f9f32acSStephen Hemminger 
53950612537SEric Dumazet 	if (q->qdisc) {
54050612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
54150612537SEric Dumazet 		if (skb)
54250612537SEric Dumazet 			goto deliver;
54350612537SEric Dumazet 	}
5440f9f32acSStephen Hemminger 	return NULL;
5451da177e4SLinus Torvalds }
5461da177e4SLinus Torvalds 
5471da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
5481da177e4SLinus Torvalds {
5491da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5501da177e4SLinus Torvalds 
55150612537SEric Dumazet 	qdisc_reset_queue(sch);
55250612537SEric Dumazet 	if (q->qdisc)
5531da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
55459cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
5551da177e4SLinus Torvalds }
5561da177e4SLinus Torvalds 
5576373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
5586373a9a2Sstephen hemminger {
5596373a9a2Sstephen hemminger 	if (d) {
5606373a9a2Sstephen hemminger 		if (is_vmalloc_addr(d))
5616373a9a2Sstephen hemminger 			vfree(d);
5626373a9a2Sstephen hemminger 		else
5636373a9a2Sstephen hemminger 			kfree(d);
5646373a9a2Sstephen hemminger 	}
5656373a9a2Sstephen hemminger }
5666373a9a2Sstephen hemminger 
5671da177e4SLinus Torvalds /*
5681da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
5691da177e4SLinus Torvalds  * signed 16 bit values.
5701da177e4SLinus Torvalds  */
5711e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
5721da177e4SLinus Torvalds {
5731da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5746373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
5751e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
5767698b4fcSDavid S. Miller 	spinlock_t *root_lock;
5771da177e4SLinus Torvalds 	struct disttable *d;
5781da177e4SLinus Torvalds 	int i;
5796373a9a2Sstephen hemminger 	size_t s;
5801da177e4SLinus Torvalds 
581df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
5821da177e4SLinus Torvalds 		return -EINVAL;
5831da177e4SLinus Torvalds 
5846373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
585bb52c7acSEric Dumazet 	d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
5866373a9a2Sstephen hemminger 	if (!d)
5876373a9a2Sstephen hemminger 		d = vmalloc(s);
5881da177e4SLinus Torvalds 	if (!d)
5891da177e4SLinus Torvalds 		return -ENOMEM;
5901da177e4SLinus Torvalds 
5911da177e4SLinus Torvalds 	d->size = n;
5921da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
5931da177e4SLinus Torvalds 		d->table[i] = data[i];
5941da177e4SLinus Torvalds 
595102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
5967698b4fcSDavid S. Miller 
5977698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
598bb52c7acSEric Dumazet 	swap(q->delay_dist, d);
5997698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
600bb52c7acSEric Dumazet 
601bb52c7acSEric Dumazet 	dist_free(d);
6021da177e4SLinus Torvalds 	return 0;
6031da177e4SLinus Torvalds }
6041da177e4SLinus Torvalds 
605265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
6061da177e4SLinus Torvalds {
6071da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6081e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
6091da177e4SLinus Torvalds 
6101da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
6111da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
6121da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
6131da177e4SLinus Torvalds }
6141da177e4SLinus Torvalds 
615265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
6160dca51d3SStephen Hemminger {
6170dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6181e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
6190dca51d3SStephen Hemminger 
6200dca51d3SStephen Hemminger 	q->reorder = r->probability;
6210dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
6220dca51d3SStephen Hemminger }
6230dca51d3SStephen Hemminger 
624265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
625c865e5d9SStephen Hemminger {
626c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
6271e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
628c865e5d9SStephen Hemminger 
629c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
630c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
631c865e5d9SStephen Hemminger }
632c865e5d9SStephen Hemminger 
6337bc0f28cSHagen Paul Pfeifer static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
6347bc0f28cSHagen Paul Pfeifer {
6357bc0f28cSHagen Paul Pfeifer 	struct netem_sched_data *q = qdisc_priv(sch);
6367bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
6377bc0f28cSHagen Paul Pfeifer 
6387bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
63990b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
64090b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
64190b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
64290b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
64390b41a1cSHagen Paul Pfeifer 	q->cell_overhead = r->cell_overhead;
6447bc0f28cSHagen Paul Pfeifer }
6457bc0f28cSHagen Paul Pfeifer 
646661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
647661b7972Sstephen hemminger {
648661b7972Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
649661b7972Sstephen hemminger 	const struct nlattr *la;
650661b7972Sstephen hemminger 	int rem;
651661b7972Sstephen hemminger 
652661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
653661b7972Sstephen hemminger 		u16 type = nla_type(la);
654661b7972Sstephen hemminger 
655661b7972Sstephen hemminger 		switch(type) {
656661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
657661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
658661b7972Sstephen hemminger 
6592494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
660661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
661661b7972Sstephen hemminger 				return -EINVAL;
662661b7972Sstephen hemminger 			}
663661b7972Sstephen hemminger 
664661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
665661b7972Sstephen hemminger 
666661b7972Sstephen hemminger 			q->clg.state = 1;
667661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
668661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
669661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
670661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
671661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
672661b7972Sstephen hemminger 			break;
673661b7972Sstephen hemminger 		}
674661b7972Sstephen hemminger 
675661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
676661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
677661b7972Sstephen hemminger 
6782494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
6792494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
680661b7972Sstephen hemminger 				return -EINVAL;
681661b7972Sstephen hemminger 			}
682661b7972Sstephen hemminger 
683661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
684661b7972Sstephen hemminger 			q->clg.state = 1;
685661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
686661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
687661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
688661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
689661b7972Sstephen hemminger 			break;
690661b7972Sstephen hemminger 		}
691661b7972Sstephen hemminger 
692661b7972Sstephen hemminger 		default:
693661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
694661b7972Sstephen hemminger 			return -EINVAL;
695661b7972Sstephen hemminger 		}
696661b7972Sstephen hemminger 	}
697661b7972Sstephen hemminger 
698661b7972Sstephen hemminger 	return 0;
699661b7972Sstephen hemminger }
700661b7972Sstephen hemminger 
70127a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
70227a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
70327a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
70427a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
7057bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
706661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
707e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
70827a3421eSPatrick McHardy };
70927a3421eSPatrick McHardy 
7102c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
7112c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
7122c10b32bSThomas Graf {
7132c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
7142c10b32bSThomas Graf 
715661b7972Sstephen hemminger 	if (nested_len < 0) {
716661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
7172c10b32bSThomas Graf 		return -EINVAL;
718661b7972Sstephen hemminger 	}
719661b7972Sstephen hemminger 
7202c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
7212c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
7222c10b32bSThomas Graf 				 nested_len, policy);
723661b7972Sstephen hemminger 
7242c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
7252c10b32bSThomas Graf 	return 0;
7262c10b32bSThomas Graf }
7272c10b32bSThomas Graf 
728c865e5d9SStephen Hemminger /* Parse netlink message to set options */
7291e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
7301da177e4SLinus Torvalds {
7311da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
732b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
7331da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
7341da177e4SLinus Torvalds 	int ret;
7351da177e4SLinus Torvalds 
736b03f4672SPatrick McHardy 	if (opt == NULL)
7371da177e4SLinus Torvalds 		return -EINVAL;
7381da177e4SLinus Torvalds 
7392c10b32bSThomas Graf 	qopt = nla_data(opt);
7402c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
741b03f4672SPatrick McHardy 	if (ret < 0)
742b03f4672SPatrick McHardy 		return ret;
743b03f4672SPatrick McHardy 
74450612537SEric Dumazet 	sch->limit = qopt->limit;
7451da177e4SLinus Torvalds 
7461da177e4SLinus Torvalds 	q->latency = qopt->latency;
7471da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
7481da177e4SLinus Torvalds 	q->limit = qopt->limit;
7491da177e4SLinus Torvalds 	q->gap = qopt->gap;
7500dca51d3SStephen Hemminger 	q->counter = 0;
7511da177e4SLinus Torvalds 	q->loss = qopt->loss;
7521da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
7531da177e4SLinus Torvalds 
754bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
755bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
7560dca51d3SStephen Hemminger 	 */
757a362e0a7SStephen Hemminger 	if (q->gap)
7580dca51d3SStephen Hemminger 		q->reorder = ~0;
7590dca51d3SStephen Hemminger 
760265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
761265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
7621da177e4SLinus Torvalds 
7631e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
7641e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
7651da177e4SLinus Torvalds 		if (ret)
7661da177e4SLinus Torvalds 			return ret;
7671da177e4SLinus Torvalds 	}
768c865e5d9SStephen Hemminger 
769265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
770265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
7711da177e4SLinus Torvalds 
772265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
773265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
7741da177e4SLinus Torvalds 
7757bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
7767bc0f28cSHagen Paul Pfeifer 		get_rate(sch, tb[TCA_NETEM_RATE]);
7777bc0f28cSHagen Paul Pfeifer 
778e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
779e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
780e4ae004bSEric Dumazet 
781661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
782661b7972Sstephen hemminger 	if (tb[TCA_NETEM_LOSS])
783661b7972Sstephen hemminger 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
784661b7972Sstephen hemminger 
785661b7972Sstephen hemminger 	return ret;
7861da177e4SLinus Torvalds }
7871da177e4SLinus Torvalds 
7881e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
7891da177e4SLinus Torvalds {
7901da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7911da177e4SLinus Torvalds 	int ret;
7921da177e4SLinus Torvalds 
7931da177e4SLinus Torvalds 	if (!opt)
7941da177e4SLinus Torvalds 		return -EINVAL;
7951da177e4SLinus Torvalds 
79659cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
7971da177e4SLinus Torvalds 
798661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
7991da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
80050612537SEric Dumazet 	if (ret)
801250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
8021da177e4SLinus Torvalds 	return ret;
8031da177e4SLinus Torvalds }
8041da177e4SLinus Torvalds 
8051da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
8061da177e4SLinus Torvalds {
8071da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
8081da177e4SLinus Torvalds 
80959cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
81050612537SEric Dumazet 	if (q->qdisc)
8111da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
8126373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
8131da177e4SLinus Torvalds }
8141da177e4SLinus Torvalds 
815661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
816661b7972Sstephen hemminger 			   struct sk_buff *skb)
817661b7972Sstephen hemminger {
818661b7972Sstephen hemminger 	struct nlattr *nest;
819661b7972Sstephen hemminger 
820661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
821661b7972Sstephen hemminger 	if (nest == NULL)
822661b7972Sstephen hemminger 		goto nla_put_failure;
823661b7972Sstephen hemminger 
824661b7972Sstephen hemminger 	switch (q->loss_model) {
825661b7972Sstephen hemminger 	case CLG_RANDOM:
826661b7972Sstephen hemminger 		/* legacy loss model */
827661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
828661b7972Sstephen hemminger 		return 0;	/* no data */
829661b7972Sstephen hemminger 
830661b7972Sstephen hemminger 	case CLG_4_STATES: {
831661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
832661b7972Sstephen hemminger 			.p13 = q->clg.a1,
833661b7972Sstephen hemminger 			.p31 = q->clg.a2,
834661b7972Sstephen hemminger 			.p32 = q->clg.a3,
835661b7972Sstephen hemminger 			.p14 = q->clg.a4,
836661b7972Sstephen hemminger 			.p23 = q->clg.a5,
837661b7972Sstephen hemminger 		};
838661b7972Sstephen hemminger 
8391b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
8401b34ec43SDavid S. Miller 			goto nla_put_failure;
841661b7972Sstephen hemminger 		break;
842661b7972Sstephen hemminger 	}
843661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
844661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
845661b7972Sstephen hemminger 			.p = q->clg.a1,
846661b7972Sstephen hemminger 			.r = q->clg.a2,
847661b7972Sstephen hemminger 			.h = q->clg.a3,
848661b7972Sstephen hemminger 			.k1 = q->clg.a4,
849661b7972Sstephen hemminger 		};
850661b7972Sstephen hemminger 
8511b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
8521b34ec43SDavid S. Miller 			goto nla_put_failure;
853661b7972Sstephen hemminger 		break;
854661b7972Sstephen hemminger 	}
855661b7972Sstephen hemminger 	}
856661b7972Sstephen hemminger 
857661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
858661b7972Sstephen hemminger 	return 0;
859661b7972Sstephen hemminger 
860661b7972Sstephen hemminger nla_put_failure:
861661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
862661b7972Sstephen hemminger 	return -1;
863661b7972Sstephen hemminger }
864661b7972Sstephen hemminger 
8651da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
8661da177e4SLinus Torvalds {
8671da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
868861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
8691da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
8701da177e4SLinus Torvalds 	struct tc_netem_corr cor;
8710dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
872c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
8737bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
8741da177e4SLinus Torvalds 
8751da177e4SLinus Torvalds 	qopt.latency = q->latency;
8761da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
8771da177e4SLinus Torvalds 	qopt.limit = q->limit;
8781da177e4SLinus Torvalds 	qopt.loss = q->loss;
8791da177e4SLinus Torvalds 	qopt.gap = q->gap;
8801da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
8811b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
8821b34ec43SDavid S. Miller 		goto nla_put_failure;
8831da177e4SLinus Torvalds 
8841da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
8851da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
8861da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
8871b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
8881b34ec43SDavid S. Miller 		goto nla_put_failure;
8890dca51d3SStephen Hemminger 
8900dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
8910dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
8921b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
8931b34ec43SDavid S. Miller 		goto nla_put_failure;
8940dca51d3SStephen Hemminger 
895c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
896c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
8971b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
8981b34ec43SDavid S. Miller 		goto nla_put_failure;
899c865e5d9SStephen Hemminger 
9007bc0f28cSHagen Paul Pfeifer 	rate.rate = q->rate;
90190b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
90290b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
90390b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
9041b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
9051b34ec43SDavid S. Miller 		goto nla_put_failure;
9067bc0f28cSHagen Paul Pfeifer 
907e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
908e4ae004bSEric Dumazet 		goto nla_put_failure;
909e4ae004bSEric Dumazet 
910661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
911661b7972Sstephen hemminger 		goto nla_put_failure;
912661b7972Sstephen hemminger 
913861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
9141da177e4SLinus Torvalds 
9151e90474cSPatrick McHardy nla_put_failure:
916861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
9171da177e4SLinus Torvalds 	return -1;
9181da177e4SLinus Torvalds }
9191da177e4SLinus Torvalds 
92010f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
92110f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
92210f6dfcfSstephen hemminger {
92310f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
92410f6dfcfSstephen hemminger 
92550612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
92610f6dfcfSstephen hemminger 		return -ENOENT;
92710f6dfcfSstephen hemminger 
92810f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
92910f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
93010f6dfcfSstephen hemminger 
93110f6dfcfSstephen hemminger 	return 0;
93210f6dfcfSstephen hemminger }
93310f6dfcfSstephen hemminger 
93410f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
93510f6dfcfSstephen hemminger 		     struct Qdisc **old)
93610f6dfcfSstephen hemminger {
93710f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
93810f6dfcfSstephen hemminger 
93910f6dfcfSstephen hemminger 	sch_tree_lock(sch);
94010f6dfcfSstephen hemminger 	*old = q->qdisc;
94110f6dfcfSstephen hemminger 	q->qdisc = new;
94250612537SEric Dumazet 	if (*old) {
94310f6dfcfSstephen hemminger 		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
94410f6dfcfSstephen hemminger 		qdisc_reset(*old);
94550612537SEric Dumazet 	}
94610f6dfcfSstephen hemminger 	sch_tree_unlock(sch);
94710f6dfcfSstephen hemminger 
94810f6dfcfSstephen hemminger 	return 0;
94910f6dfcfSstephen hemminger }
95010f6dfcfSstephen hemminger 
95110f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
95210f6dfcfSstephen hemminger {
95310f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
95410f6dfcfSstephen hemminger 	return q->qdisc;
95510f6dfcfSstephen hemminger }
95610f6dfcfSstephen hemminger 
95710f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
95810f6dfcfSstephen hemminger {
95910f6dfcfSstephen hemminger 	return 1;
96010f6dfcfSstephen hemminger }
96110f6dfcfSstephen hemminger 
96210f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
96310f6dfcfSstephen hemminger {
96410f6dfcfSstephen hemminger }
96510f6dfcfSstephen hemminger 
96610f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
96710f6dfcfSstephen hemminger {
96810f6dfcfSstephen hemminger 	if (!walker->stop) {
96910f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
97010f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
97110f6dfcfSstephen hemminger 				walker->stop = 1;
97210f6dfcfSstephen hemminger 				return;
97310f6dfcfSstephen hemminger 			}
97410f6dfcfSstephen hemminger 		walker->count++;
97510f6dfcfSstephen hemminger 	}
97610f6dfcfSstephen hemminger }
97710f6dfcfSstephen hemminger 
97810f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
97910f6dfcfSstephen hemminger 	.graft		=	netem_graft,
98010f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
98110f6dfcfSstephen hemminger 	.get		=	netem_get,
98210f6dfcfSstephen hemminger 	.put		=	netem_put,
98310f6dfcfSstephen hemminger 	.walk		=	netem_walk,
98410f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
98510f6dfcfSstephen hemminger };
98610f6dfcfSstephen hemminger 
98720fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
9881da177e4SLinus Torvalds 	.id		=	"netem",
98910f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
9901da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
9911da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
9921da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
99377be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
9941da177e4SLinus Torvalds 	.drop		=	netem_drop,
9951da177e4SLinus Torvalds 	.init		=	netem_init,
9961da177e4SLinus Torvalds 	.reset		=	netem_reset,
9971da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
9981da177e4SLinus Torvalds 	.change		=	netem_change,
9991da177e4SLinus Torvalds 	.dump		=	netem_dump,
10001da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
10011da177e4SLinus Torvalds };
10021da177e4SLinus Torvalds 
10031da177e4SLinus Torvalds 
10041da177e4SLinus Torvalds static int __init netem_module_init(void)
10051da177e4SLinus Torvalds {
1006eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
10071da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
10081da177e4SLinus Torvalds }
10091da177e4SLinus Torvalds static void __exit netem_module_exit(void)
10101da177e4SLinus Torvalds {
10111da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
10121da177e4SLinus Torvalds }
10131da177e4SLinus Torvalds module_init(netem_module_init)
10141da177e4SLinus Torvalds module_exit(netem_module_exit)
10151da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1016