xref: /openbmc/linux/net/sched/sch_netem.c (revision 25985edc)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/module.h>
175a0e3ad6STejun Heo #include <linux/slab.h>
181da177e4SLinus Torvalds #include <linux/types.h>
191da177e4SLinus Torvalds #include <linux/kernel.h>
201da177e4SLinus Torvalds #include <linux/errno.h>
211da177e4SLinus Torvalds #include <linux/skbuff.h>
2278776d3fSDavid S. Miller #include <linux/vmalloc.h>
231da177e4SLinus Torvalds #include <linux/rtnetlink.h>
241da177e4SLinus Torvalds 
25dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
261da177e4SLinus Torvalds #include <net/pkt_sched.h>
271da177e4SLinus Torvalds 
28250a65f7Sstephen hemminger #define VERSION "1.3"
29eb229c4cSStephen Hemminger 
301da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
311da177e4SLinus Torvalds 	====================================
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
341da177e4SLinus Torvalds 		 Network Emulation Tool
351da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	 ----------------------------------------------------------------
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
401da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
411da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
421da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
431da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
441da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
451da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
481da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
491da177e4SLinus Torvalds 	 control either since that can be handled by using token
501da177e4SLinus Torvalds 	 bucket or other rate control.
51661b7972Sstephen hemminger 
52661b7972Sstephen hemminger      Correlated Loss Generator models
53661b7972Sstephen hemminger 
54661b7972Sstephen hemminger 	Added generation of correlated loss according to the
55661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
56661b7972Sstephen hemminger 
57661b7972Sstephen hemminger 	References:
58661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
59661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
60661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
61661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
62661b7972Sstephen hemminger 
63661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
64661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
651da177e4SLinus Torvalds */
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds struct netem_sched_data {
681da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
6959cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
701da177e4SLinus Torvalds 
71b407621cSStephen Hemminger 	psched_tdiff_t latency;
72b407621cSStephen Hemminger 	psched_tdiff_t jitter;
73b407621cSStephen Hemminger 
741da177e4SLinus Torvalds 	u32 loss;
751da177e4SLinus Torvalds 	u32 limit;
761da177e4SLinus Torvalds 	u32 counter;
771da177e4SLinus Torvalds 	u32 gap;
781da177e4SLinus Torvalds 	u32 duplicate;
790dca51d3SStephen Hemminger 	u32 reorder;
80c865e5d9SStephen Hemminger 	u32 corrupt;
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds 	struct crndstate {
83b407621cSStephen Hemminger 		u32 last;
84b407621cSStephen Hemminger 		u32 rho;
85c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds 	struct disttable {
881da177e4SLinus Torvalds 		u32  size;
891da177e4SLinus Torvalds 		s16 table[0];
901da177e4SLinus Torvalds 	} *delay_dist;
91661b7972Sstephen hemminger 
92661b7972Sstephen hemminger 	enum  {
93661b7972Sstephen hemminger 		CLG_RANDOM,
94661b7972Sstephen hemminger 		CLG_4_STATES,
95661b7972Sstephen hemminger 		CLG_GILB_ELL,
96661b7972Sstephen hemminger 	} loss_model;
97661b7972Sstephen hemminger 
98661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
99661b7972Sstephen hemminger 	struct clgstate {
100661b7972Sstephen hemminger 		/* state of the Markov chain */
101661b7972Sstephen hemminger 		u8 state;
102661b7972Sstephen hemminger 
103661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
104661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
105661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
106661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
107661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
108661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
109661b7972Sstephen hemminger 	} clg;
110661b7972Sstephen hemminger 
1111da177e4SLinus Torvalds };
1121da177e4SLinus Torvalds 
1131da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
1141da177e4SLinus Torvalds struct netem_skb_cb {
1151da177e4SLinus Torvalds 	psched_time_t	time_to_send;
1161da177e4SLinus Torvalds };
1171da177e4SLinus Torvalds 
1185f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1195f86173bSJussi Kivilinna {
120175f9c1bSJussi Kivilinna 	BUILD_BUG_ON(sizeof(skb->cb) <
121175f9c1bSJussi Kivilinna 		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
122175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1235f86173bSJussi Kivilinna }
1245f86173bSJussi Kivilinna 
1251da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1261da177e4SLinus Torvalds  * Use entropy source for initial seed.
1271da177e4SLinus Torvalds  */
1281da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1291da177e4SLinus Torvalds {
1301da177e4SLinus Torvalds 	state->rho = rho;
1311da177e4SLinus Torvalds 	state->last = net_random();
1321da177e4SLinus Torvalds }
1331da177e4SLinus Torvalds 
1341da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1351da177e4SLinus Torvalds  * Next number depends on last value.
1361da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1371da177e4SLinus Torvalds  */
138b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1391da177e4SLinus Torvalds {
1401da177e4SLinus Torvalds 	u64 value, rho;
1411da177e4SLinus Torvalds 	unsigned long answer;
1421da177e4SLinus Torvalds 
143bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1441da177e4SLinus Torvalds 		return net_random();
1451da177e4SLinus Torvalds 
1461da177e4SLinus Torvalds 	value = net_random();
1471da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1481da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1491da177e4SLinus Torvalds 	state->last = answer;
1501da177e4SLinus Torvalds 	return answer;
1511da177e4SLinus Torvalds }
1521da177e4SLinus Torvalds 
153661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
154661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
155661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
156661b7972Sstephen hemminger  */
157661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
158661b7972Sstephen hemminger {
159661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
160661b7972Sstephen hemminger 	u32 rnd = net_random();
161661b7972Sstephen hemminger 
162661b7972Sstephen hemminger 	/*
16325985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
164661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
165661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
166661b7972Sstephen hemminger 	 * The four states correspond to:
167661b7972Sstephen hemminger 	 *   1 => successfully transmitted packets within a gap period
168661b7972Sstephen hemminger 	 *   4 => isolated losses within a gap period
169661b7972Sstephen hemminger 	 *   3 => lost packets within a burst period
170661b7972Sstephen hemminger 	 *   2 => successfully transmitted packets within a burst period
171661b7972Sstephen hemminger 	 */
172661b7972Sstephen hemminger 	switch (clg->state) {
173661b7972Sstephen hemminger 	case 1:
174661b7972Sstephen hemminger 		if (rnd < clg->a4) {
175661b7972Sstephen hemminger 			clg->state = 4;
176661b7972Sstephen hemminger 			return true;
177661b7972Sstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1) {
178661b7972Sstephen hemminger 			clg->state = 3;
179661b7972Sstephen hemminger 			return true;
180661b7972Sstephen hemminger 		} else if (clg->a1 < rnd)
181661b7972Sstephen hemminger 			clg->state = 1;
182661b7972Sstephen hemminger 
183661b7972Sstephen hemminger 		break;
184661b7972Sstephen hemminger 	case 2:
185661b7972Sstephen hemminger 		if (rnd < clg->a5) {
186661b7972Sstephen hemminger 			clg->state = 3;
187661b7972Sstephen hemminger 			return true;
188661b7972Sstephen hemminger 		} else
189661b7972Sstephen hemminger 			clg->state = 2;
190661b7972Sstephen hemminger 
191661b7972Sstephen hemminger 		break;
192661b7972Sstephen hemminger 	case 3:
193661b7972Sstephen hemminger 		if (rnd < clg->a3)
194661b7972Sstephen hemminger 			clg->state = 2;
195661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
196661b7972Sstephen hemminger 			clg->state = 1;
197661b7972Sstephen hemminger 			return true;
198661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
199661b7972Sstephen hemminger 			clg->state = 3;
200661b7972Sstephen hemminger 			return true;
201661b7972Sstephen hemminger 		}
202661b7972Sstephen hemminger 		break;
203661b7972Sstephen hemminger 	case 4:
204661b7972Sstephen hemminger 		clg->state = 1;
205661b7972Sstephen hemminger 		break;
206661b7972Sstephen hemminger 	}
207661b7972Sstephen hemminger 
208661b7972Sstephen hemminger 	return false;
209661b7972Sstephen hemminger }
210661b7972Sstephen hemminger 
211661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
212661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
213661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
214661b7972Sstephen hemminger  *
21525985edcSLucas De Marchi  * Makes a comparison between random number and the transition
216661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
21725985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
218661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
219661b7972Sstephen hemminger  * packet will be transmitted or lost.
220661b7972Sstephen hemminger  */
221661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
222661b7972Sstephen hemminger {
223661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
224661b7972Sstephen hemminger 
225661b7972Sstephen hemminger 	switch (clg->state) {
226661b7972Sstephen hemminger 	case 1:
227661b7972Sstephen hemminger 		if (net_random() < clg->a1)
228661b7972Sstephen hemminger 			clg->state = 2;
229661b7972Sstephen hemminger 		if (net_random() < clg->a4)
230661b7972Sstephen hemminger 			return true;
231661b7972Sstephen hemminger 	case 2:
232661b7972Sstephen hemminger 		if (net_random() < clg->a2)
233661b7972Sstephen hemminger 			clg->state = 1;
234661b7972Sstephen hemminger 		if (clg->a3 > net_random())
235661b7972Sstephen hemminger 			return true;
236661b7972Sstephen hemminger 	}
237661b7972Sstephen hemminger 
238661b7972Sstephen hemminger 	return false;
239661b7972Sstephen hemminger }
240661b7972Sstephen hemminger 
241661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
242661b7972Sstephen hemminger {
243661b7972Sstephen hemminger 	switch (q->loss_model) {
244661b7972Sstephen hemminger 	case CLG_RANDOM:
245661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
246661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
247661b7972Sstephen hemminger 
248661b7972Sstephen hemminger 	case CLG_4_STATES:
249661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
250661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
251661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
252661b7972Sstephen hemminger 		* the kernel logs
253661b7972Sstephen hemminger 		*/
254661b7972Sstephen hemminger 		return loss_4state(q);
255661b7972Sstephen hemminger 
256661b7972Sstephen hemminger 	case CLG_GILB_ELL:
257661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
258661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
259661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
260661b7972Sstephen hemminger 		* the kernel logs
261661b7972Sstephen hemminger 		*/
262661b7972Sstephen hemminger 		return loss_gilb_ell(q);
263661b7972Sstephen hemminger 	}
264661b7972Sstephen hemminger 
265661b7972Sstephen hemminger 	return false;	/* not reached */
266661b7972Sstephen hemminger }
267661b7972Sstephen hemminger 
268661b7972Sstephen hemminger 
2691da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
2701da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
2711da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
2721da177e4SLinus Torvalds  */
273b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
274b407621cSStephen Hemminger 				struct crndstate *state,
275b407621cSStephen Hemminger 				const struct disttable *dist)
2761da177e4SLinus Torvalds {
277b407621cSStephen Hemminger 	psched_tdiff_t x;
278b407621cSStephen Hemminger 	long t;
279b407621cSStephen Hemminger 	u32 rnd;
2801da177e4SLinus Torvalds 
2811da177e4SLinus Torvalds 	if (sigma == 0)
2821da177e4SLinus Torvalds 		return mu;
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds 	rnd = get_crandom(state);
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds 	/* default uniform distribution */
2871da177e4SLinus Torvalds 	if (dist == NULL)
2881da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
2891da177e4SLinus Torvalds 
2901da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
2911da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
2921da177e4SLinus Torvalds 	if (x >= 0)
2931da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
2941da177e4SLinus Torvalds 	else
2951da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
2961da177e4SLinus Torvalds 
2971da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
2981da177e4SLinus Torvalds }
2991da177e4SLinus Torvalds 
3000afb51e7SStephen Hemminger /*
3010afb51e7SStephen Hemminger  * Insert one skb into qdisc.
3020afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
3030afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
3040afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
3050afb51e7SStephen Hemminger  */
3061da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
3071da177e4SLinus Torvalds {
3081da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
30989e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
31089e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
3110afb51e7SStephen Hemminger 	struct sk_buff *skb2;
3121da177e4SLinus Torvalds 	int ret;
3130afb51e7SStephen Hemminger 	int count = 1;
3141da177e4SLinus Torvalds 
3150afb51e7SStephen Hemminger 	/* Random duplication */
3160afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
3170afb51e7SStephen Hemminger 		++count;
3180afb51e7SStephen Hemminger 
319661b7972Sstephen hemminger 	/* Drop packet? */
320661b7972Sstephen hemminger 	if (loss_event(q))
3210afb51e7SStephen Hemminger 		--count;
3220afb51e7SStephen Hemminger 
3230afb51e7SStephen Hemminger 	if (count == 0) {
3241da177e4SLinus Torvalds 		sch->qstats.drops++;
3251da177e4SLinus Torvalds 		kfree_skb(skb);
326c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3271da177e4SLinus Torvalds 	}
3281da177e4SLinus Torvalds 
3294e8a5201SDavid S. Miller 	skb_orphan(skb);
3304e8a5201SDavid S. Miller 
3310afb51e7SStephen Hemminger 	/*
3320afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
3330afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
3340afb51e7SStephen Hemminger 	 * skb will be queued.
335d5d75cd6SStephen Hemminger 	 */
3360afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
3377698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
3380afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
3390afb51e7SStephen Hemminger 		q->duplicate = 0;
340d5d75cd6SStephen Hemminger 
3415f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
3420afb51e7SStephen Hemminger 		q->duplicate = dupsave;
3431da177e4SLinus Torvalds 	}
3441da177e4SLinus Torvalds 
345c865e5d9SStephen Hemminger 	/*
346c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
347c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
348c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
349c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
350c865e5d9SStephen Hemminger 	 */
351c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
352f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
353f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
354f64f9e71SJoe Perches 		     skb_checksum_help(skb))) {
355c865e5d9SStephen Hemminger 			sch->qstats.drops++;
356c865e5d9SStephen Hemminger 			return NET_XMIT_DROP;
357c865e5d9SStephen Hemminger 		}
358c865e5d9SStephen Hemminger 
359c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
360c865e5d9SStephen Hemminger 	}
361c865e5d9SStephen Hemminger 
3625f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
363f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
364f64f9e71SJoe Perches 	    q->counter < q->gap ||	/* inside last reordering gap */
365f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
3660f9f32acSStephen Hemminger 		psched_time_t now;
36707aaa115SStephen Hemminger 		psched_tdiff_t delay;
36807aaa115SStephen Hemminger 
36907aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
37007aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
37107aaa115SStephen Hemminger 
3723bebcda2SPatrick McHardy 		now = psched_get_time();
3737c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
3741da177e4SLinus Torvalds 		++q->counter;
3755f86173bSJussi Kivilinna 		ret = qdisc_enqueue(skb, q->qdisc);
3761da177e4SLinus Torvalds 	} else {
3770dca51d3SStephen Hemminger 		/*
3780dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
3790dca51d3SStephen Hemminger 		 * of the queue.
3800dca51d3SStephen Hemminger 		 */
3813bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
3820dca51d3SStephen Hemminger 		q->counter = 0;
3838ba25dadSJarek Poplawski 
3848ba25dadSJarek Poplawski 		__skb_queue_head(&q->qdisc->q, skb);
3858ba25dadSJarek Poplawski 		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
3868ba25dadSJarek Poplawski 		q->qdisc->qstats.requeues++;
3878ba25dadSJarek Poplawski 		ret = NET_XMIT_SUCCESS;
3881da177e4SLinus Torvalds 	}
3891da177e4SLinus Torvalds 
39010f6dfcfSstephen hemminger 	if (ret != NET_XMIT_SUCCESS) {
39110f6dfcfSstephen hemminger 		if (net_xmit_drop_count(ret)) {
3921da177e4SLinus Torvalds 			sch->qstats.drops++;
39310f6dfcfSstephen hemminger 			return ret;
39410f6dfcfSstephen hemminger 		}
395378a2f09SJarek Poplawski 	}
3961da177e4SLinus Torvalds 
39710f6dfcfSstephen hemminger 	sch->q.qlen++;
39810f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
3991da177e4SLinus Torvalds }
4001da177e4SLinus Torvalds 
4011da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch)
4021da177e4SLinus Torvalds {
4031da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4046d037a26SPatrick McHardy 	unsigned int len = 0;
4051da177e4SLinus Torvalds 
4066d037a26SPatrick McHardy 	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
4071da177e4SLinus Torvalds 		sch->q.qlen--;
4081da177e4SLinus Torvalds 		sch->qstats.drops++;
4091da177e4SLinus Torvalds 	}
4101da177e4SLinus Torvalds 	return len;
4111da177e4SLinus Torvalds }
4121da177e4SLinus Torvalds 
4131da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
4141da177e4SLinus Torvalds {
4151da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4161da177e4SLinus Torvalds 	struct sk_buff *skb;
4171da177e4SLinus Torvalds 
418fd245a4aSEric Dumazet 	if (qdisc_is_throttled(sch))
41911274e5aSStephen Hemminger 		return NULL;
42011274e5aSStephen Hemminger 
42103c05f0dSJarek Poplawski 	skb = q->qdisc->ops->peek(q->qdisc);
422771018e7SStephen Hemminger 	if (skb) {
4235f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
4243bebcda2SPatrick McHardy 		psched_time_t now = psched_get_time();
4250f9f32acSStephen Hemminger 
4260f9f32acSStephen Hemminger 		/* if more time remaining? */
427104e0878SPatrick McHardy 		if (cb->time_to_send <= now) {
42877be155cSJarek Poplawski 			skb = qdisc_dequeue_peeked(q->qdisc);
42977be155cSJarek Poplawski 			if (unlikely(!skb))
43003c05f0dSJarek Poplawski 				return NULL;
43103c05f0dSJarek Poplawski 
4328caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
4338caf1539SJarek Poplawski 			/*
4348caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
4358caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
4368caf1539SJarek Poplawski 			 */
4378caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
4388caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
4398caf1539SJarek Poplawski #endif
44010f6dfcfSstephen hemminger 
4411da177e4SLinus Torvalds 			sch->q.qlen--;
44210f6dfcfSstephen hemminger 			qdisc_unthrottled(sch);
44310f6dfcfSstephen hemminger 			qdisc_bstats_update(sch, skb);
4440f9f32acSStephen Hemminger 			return skb;
44511274e5aSStephen Hemminger 		}
44607aaa115SStephen Hemminger 
44711274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
4480f9f32acSStephen Hemminger 	}
4490f9f32acSStephen Hemminger 
4500f9f32acSStephen Hemminger 	return NULL;
4511da177e4SLinus Torvalds }
4521da177e4SLinus Torvalds 
4531da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
4541da177e4SLinus Torvalds {
4551da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4561da177e4SLinus Torvalds 
4571da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
4581da177e4SLinus Torvalds 	sch->q.qlen = 0;
45959cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds 
4626373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
4636373a9a2Sstephen hemminger {
4646373a9a2Sstephen hemminger 	if (d) {
4656373a9a2Sstephen hemminger 		if (is_vmalloc_addr(d))
4666373a9a2Sstephen hemminger 			vfree(d);
4676373a9a2Sstephen hemminger 		else
4686373a9a2Sstephen hemminger 			kfree(d);
4696373a9a2Sstephen hemminger 	}
4706373a9a2Sstephen hemminger }
4716373a9a2Sstephen hemminger 
4721da177e4SLinus Torvalds /*
4731da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
4741da177e4SLinus Torvalds  * signed 16 bit values.
4751da177e4SLinus Torvalds  */
4761e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
4771da177e4SLinus Torvalds {
4781da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4796373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
4801e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
4817698b4fcSDavid S. Miller 	spinlock_t *root_lock;
4821da177e4SLinus Torvalds 	struct disttable *d;
4831da177e4SLinus Torvalds 	int i;
4846373a9a2Sstephen hemminger 	size_t s;
4851da177e4SLinus Torvalds 
486df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
4871da177e4SLinus Torvalds 		return -EINVAL;
4881da177e4SLinus Torvalds 
4896373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
4906373a9a2Sstephen hemminger 	d = kmalloc(s, GFP_KERNEL);
4916373a9a2Sstephen hemminger 	if (!d)
4926373a9a2Sstephen hemminger 		d = vmalloc(s);
4931da177e4SLinus Torvalds 	if (!d)
4941da177e4SLinus Torvalds 		return -ENOMEM;
4951da177e4SLinus Torvalds 
4961da177e4SLinus Torvalds 	d->size = n;
4971da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
4981da177e4SLinus Torvalds 		d->table[i] = data[i];
4991da177e4SLinus Torvalds 
500102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
5017698b4fcSDavid S. Miller 
5027698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
5036373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
504b94c8afcSPatrick McHardy 	q->delay_dist = d;
5057698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
5061da177e4SLinus Torvalds 	return 0;
5071da177e4SLinus Torvalds }
5081da177e4SLinus Torvalds 
509265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
5101da177e4SLinus Torvalds {
5111da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5121e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
5131da177e4SLinus Torvalds 
5141da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
5151da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
5161da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
5171da177e4SLinus Torvalds }
5181da177e4SLinus Torvalds 
519265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
5200dca51d3SStephen Hemminger {
5210dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
5221e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
5230dca51d3SStephen Hemminger 
5240dca51d3SStephen Hemminger 	q->reorder = r->probability;
5250dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
5260dca51d3SStephen Hemminger }
5270dca51d3SStephen Hemminger 
528265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
529c865e5d9SStephen Hemminger {
530c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
5311e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
532c865e5d9SStephen Hemminger 
533c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
534c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
535c865e5d9SStephen Hemminger }
536c865e5d9SStephen Hemminger 
537661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
538661b7972Sstephen hemminger {
539661b7972Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
540661b7972Sstephen hemminger 	const struct nlattr *la;
541661b7972Sstephen hemminger 	int rem;
542661b7972Sstephen hemminger 
543661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
544661b7972Sstephen hemminger 		u16 type = nla_type(la);
545661b7972Sstephen hemminger 
546661b7972Sstephen hemminger 		switch(type) {
547661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
548661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
549661b7972Sstephen hemminger 
550661b7972Sstephen hemminger 			if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
551661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
552661b7972Sstephen hemminger 				return -EINVAL;
553661b7972Sstephen hemminger 			}
554661b7972Sstephen hemminger 
555661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
556661b7972Sstephen hemminger 
557661b7972Sstephen hemminger 			q->clg.state = 1;
558661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
559661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
560661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
561661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
562661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
563661b7972Sstephen hemminger 			break;
564661b7972Sstephen hemminger 		}
565661b7972Sstephen hemminger 
566661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
567661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
568661b7972Sstephen hemminger 
569661b7972Sstephen hemminger 			if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
570661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
571661b7972Sstephen hemminger 				return -EINVAL;
572661b7972Sstephen hemminger 			}
573661b7972Sstephen hemminger 
574661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
575661b7972Sstephen hemminger 			q->clg.state = 1;
576661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
577661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
578661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
579661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
580661b7972Sstephen hemminger 			break;
581661b7972Sstephen hemminger 		}
582661b7972Sstephen hemminger 
583661b7972Sstephen hemminger 		default:
584661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
585661b7972Sstephen hemminger 			return -EINVAL;
586661b7972Sstephen hemminger 		}
587661b7972Sstephen hemminger 	}
588661b7972Sstephen hemminger 
589661b7972Sstephen hemminger 	return 0;
590661b7972Sstephen hemminger }
591661b7972Sstephen hemminger 
59227a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
59327a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
59427a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
59527a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
596661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
59727a3421eSPatrick McHardy };
59827a3421eSPatrick McHardy 
5992c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
6002c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
6012c10b32bSThomas Graf {
6022c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
6032c10b32bSThomas Graf 
604661b7972Sstephen hemminger 	if (nested_len < 0) {
605661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
6062c10b32bSThomas Graf 		return -EINVAL;
607661b7972Sstephen hemminger 	}
608661b7972Sstephen hemminger 
6092c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
6102c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
6112c10b32bSThomas Graf 				 nested_len, policy);
612661b7972Sstephen hemminger 
6132c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
6142c10b32bSThomas Graf 	return 0;
6152c10b32bSThomas Graf }
6162c10b32bSThomas Graf 
617c865e5d9SStephen Hemminger /* Parse netlink message to set options */
6181e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
6191da177e4SLinus Torvalds {
6201da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
621b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
6221da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
6231da177e4SLinus Torvalds 	int ret;
6241da177e4SLinus Torvalds 
625b03f4672SPatrick McHardy 	if (opt == NULL)
6261da177e4SLinus Torvalds 		return -EINVAL;
6271da177e4SLinus Torvalds 
6282c10b32bSThomas Graf 	qopt = nla_data(opt);
6292c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
630b03f4672SPatrick McHardy 	if (ret < 0)
631b03f4672SPatrick McHardy 		return ret;
632b03f4672SPatrick McHardy 
633fb0305ceSPatrick McHardy 	ret = fifo_set_limit(q->qdisc, qopt->limit);
6341da177e4SLinus Torvalds 	if (ret) {
635250a65f7Sstephen hemminger 		pr_info("netem: can't set fifo limit\n");
6361da177e4SLinus Torvalds 		return ret;
6371da177e4SLinus Torvalds 	}
6381da177e4SLinus Torvalds 
6391da177e4SLinus Torvalds 	q->latency = qopt->latency;
6401da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
6411da177e4SLinus Torvalds 	q->limit = qopt->limit;
6421da177e4SLinus Torvalds 	q->gap = qopt->gap;
6430dca51d3SStephen Hemminger 	q->counter = 0;
6441da177e4SLinus Torvalds 	q->loss = qopt->loss;
6451da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
6461da177e4SLinus Torvalds 
647bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
648bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
6490dca51d3SStephen Hemminger 	 */
650a362e0a7SStephen Hemminger 	if (q->gap)
6510dca51d3SStephen Hemminger 		q->reorder = ~0;
6520dca51d3SStephen Hemminger 
653265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
654265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
6551da177e4SLinus Torvalds 
6561e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
6571e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
6581da177e4SLinus Torvalds 		if (ret)
6591da177e4SLinus Torvalds 			return ret;
6601da177e4SLinus Torvalds 	}
661c865e5d9SStephen Hemminger 
662265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
663265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
6641da177e4SLinus Torvalds 
665265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
666265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
6671da177e4SLinus Torvalds 
668661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
669661b7972Sstephen hemminger 	if (tb[TCA_NETEM_LOSS])
670661b7972Sstephen hemminger 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
671661b7972Sstephen hemminger 
672661b7972Sstephen hemminger 	return ret;
6731da177e4SLinus Torvalds }
6741da177e4SLinus Torvalds 
675300ce174SStephen Hemminger /*
676300ce174SStephen Hemminger  * Special case version of FIFO queue for use by netem.
677300ce174SStephen Hemminger  * It queues in order based on timestamps in skb's
678300ce174SStephen Hemminger  */
679300ce174SStephen Hemminger struct fifo_sched_data {
680300ce174SStephen Hemminger 	u32 limit;
681075aa573SStephen Hemminger 	psched_time_t oldest;
682300ce174SStephen Hemminger };
683300ce174SStephen Hemminger 
684300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
685300ce174SStephen Hemminger {
686300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
687300ce174SStephen Hemminger 	struct sk_buff_head *list = &sch->q;
6885f86173bSJussi Kivilinna 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
689300ce174SStephen Hemminger 	struct sk_buff *skb;
690300ce174SStephen Hemminger 
691300ce174SStephen Hemminger 	if (likely(skb_queue_len(list) < q->limit)) {
692075aa573SStephen Hemminger 		/* Optimize for add at tail */
693104e0878SPatrick McHardy 		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
694075aa573SStephen Hemminger 			q->oldest = tnext;
695075aa573SStephen Hemminger 			return qdisc_enqueue_tail(nskb, sch);
696075aa573SStephen Hemminger 		}
697075aa573SStephen Hemminger 
698300ce174SStephen Hemminger 		skb_queue_reverse_walk(list, skb) {
6995f86173bSJussi Kivilinna 			const struct netem_skb_cb *cb = netem_skb_cb(skb);
700300ce174SStephen Hemminger 
701104e0878SPatrick McHardy 			if (tnext >= cb->time_to_send)
702300ce174SStephen Hemminger 				break;
703300ce174SStephen Hemminger 		}
704300ce174SStephen Hemminger 
705300ce174SStephen Hemminger 		__skb_queue_after(list, skb, nskb);
706300ce174SStephen Hemminger 
7070abf77e5SJussi Kivilinna 		sch->qstats.backlog += qdisc_pkt_len(nskb);
708300ce174SStephen Hemminger 
709300ce174SStephen Hemminger 		return NET_XMIT_SUCCESS;
710300ce174SStephen Hemminger 	}
711300ce174SStephen Hemminger 
712075aa573SStephen Hemminger 	return qdisc_reshape_fail(nskb, sch);
713300ce174SStephen Hemminger }
714300ce174SStephen Hemminger 
7151e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
716300ce174SStephen Hemminger {
717300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
718300ce174SStephen Hemminger 
719300ce174SStephen Hemminger 	if (opt) {
7201e90474cSPatrick McHardy 		struct tc_fifo_qopt *ctl = nla_data(opt);
7211e90474cSPatrick McHardy 		if (nla_len(opt) < sizeof(*ctl))
722300ce174SStephen Hemminger 			return -EINVAL;
723300ce174SStephen Hemminger 
724300ce174SStephen Hemminger 		q->limit = ctl->limit;
725300ce174SStephen Hemminger 	} else
7265ce2d488SDavid S. Miller 		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
727300ce174SStephen Hemminger 
728a084980dSPatrick McHardy 	q->oldest = PSCHED_PASTPERFECT;
729300ce174SStephen Hemminger 	return 0;
730300ce174SStephen Hemminger }
731300ce174SStephen Hemminger 
732300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
733300ce174SStephen Hemminger {
734300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
735300ce174SStephen Hemminger 	struct tc_fifo_qopt opt = { .limit = q->limit };
736300ce174SStephen Hemminger 
7371e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
738300ce174SStephen Hemminger 	return skb->len;
739300ce174SStephen Hemminger 
7401e90474cSPatrick McHardy nla_put_failure:
741300ce174SStephen Hemminger 	return -1;
742300ce174SStephen Hemminger }
743300ce174SStephen Hemminger 
74420fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
745300ce174SStephen Hemminger 	.id		=	"tfifo",
746300ce174SStephen Hemminger 	.priv_size	=	sizeof(struct fifo_sched_data),
747300ce174SStephen Hemminger 	.enqueue	=	tfifo_enqueue,
748300ce174SStephen Hemminger 	.dequeue	=	qdisc_dequeue_head,
7498e3af978SJarek Poplawski 	.peek		=	qdisc_peek_head,
750300ce174SStephen Hemminger 	.drop		=	qdisc_queue_drop,
751300ce174SStephen Hemminger 	.init		=	tfifo_init,
752300ce174SStephen Hemminger 	.reset		=	qdisc_reset_queue,
753300ce174SStephen Hemminger 	.change		=	tfifo_init,
754300ce174SStephen Hemminger 	.dump		=	tfifo_dump,
755300ce174SStephen Hemminger };
756300ce174SStephen Hemminger 
7571e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
7581da177e4SLinus Torvalds {
7591da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7601da177e4SLinus Torvalds 	int ret;
7611da177e4SLinus Torvalds 
7621da177e4SLinus Torvalds 	if (!opt)
7631da177e4SLinus Torvalds 		return -EINVAL;
7641da177e4SLinus Torvalds 
76559cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
7661da177e4SLinus Torvalds 
767661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
7683511c913SChangli Gao 	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
7699f9afec4SPatrick McHardy 				     TC_H_MAKE(sch->handle, 1));
7701da177e4SLinus Torvalds 	if (!q->qdisc) {
771250a65f7Sstephen hemminger 		pr_notice("netem: qdisc create tfifo qdisc failed\n");
7721da177e4SLinus Torvalds 		return -ENOMEM;
7731da177e4SLinus Torvalds 	}
7741da177e4SLinus Torvalds 
7751da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
7761da177e4SLinus Torvalds 	if (ret) {
777250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
7781da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
7791da177e4SLinus Torvalds 	}
7801da177e4SLinus Torvalds 	return ret;
7811da177e4SLinus Torvalds }
7821da177e4SLinus Torvalds 
7831da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
7841da177e4SLinus Torvalds {
7851da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7861da177e4SLinus Torvalds 
78759cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
7881da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
7896373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
7901da177e4SLinus Torvalds }
7911da177e4SLinus Torvalds 
792661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
793661b7972Sstephen hemminger 			   struct sk_buff *skb)
794661b7972Sstephen hemminger {
795661b7972Sstephen hemminger 	struct nlattr *nest;
796661b7972Sstephen hemminger 
797661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
798661b7972Sstephen hemminger 	if (nest == NULL)
799661b7972Sstephen hemminger 		goto nla_put_failure;
800661b7972Sstephen hemminger 
801661b7972Sstephen hemminger 	switch (q->loss_model) {
802661b7972Sstephen hemminger 	case CLG_RANDOM:
803661b7972Sstephen hemminger 		/* legacy loss model */
804661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
805661b7972Sstephen hemminger 		return 0;	/* no data */
806661b7972Sstephen hemminger 
807661b7972Sstephen hemminger 	case CLG_4_STATES: {
808661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
809661b7972Sstephen hemminger 			.p13 = q->clg.a1,
810661b7972Sstephen hemminger 			.p31 = q->clg.a2,
811661b7972Sstephen hemminger 			.p32 = q->clg.a3,
812661b7972Sstephen hemminger 			.p14 = q->clg.a4,
813661b7972Sstephen hemminger 			.p23 = q->clg.a5,
814661b7972Sstephen hemminger 		};
815661b7972Sstephen hemminger 
816661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
817661b7972Sstephen hemminger 		break;
818661b7972Sstephen hemminger 	}
819661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
820661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
821661b7972Sstephen hemminger 			.p = q->clg.a1,
822661b7972Sstephen hemminger 			.r = q->clg.a2,
823661b7972Sstephen hemminger 			.h = q->clg.a3,
824661b7972Sstephen hemminger 			.k1 = q->clg.a4,
825661b7972Sstephen hemminger 		};
826661b7972Sstephen hemminger 
827661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
828661b7972Sstephen hemminger 		break;
829661b7972Sstephen hemminger 	}
830661b7972Sstephen hemminger 	}
831661b7972Sstephen hemminger 
832661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
833661b7972Sstephen hemminger 	return 0;
834661b7972Sstephen hemminger 
835661b7972Sstephen hemminger nla_put_failure:
836661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
837661b7972Sstephen hemminger 	return -1;
838661b7972Sstephen hemminger }
839661b7972Sstephen hemminger 
8401da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
8411da177e4SLinus Torvalds {
8421da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
843861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
8441da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
8451da177e4SLinus Torvalds 	struct tc_netem_corr cor;
8460dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
847c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
8481da177e4SLinus Torvalds 
8491da177e4SLinus Torvalds 	qopt.latency = q->latency;
8501da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
8511da177e4SLinus Torvalds 	qopt.limit = q->limit;
8521da177e4SLinus Torvalds 	qopt.loss = q->loss;
8531da177e4SLinus Torvalds 	qopt.gap = q->gap;
8541da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
8551e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
8561da177e4SLinus Torvalds 
8571da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
8581da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
8591da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
8601e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
8610dca51d3SStephen Hemminger 
8620dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
8630dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
8641e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
8650dca51d3SStephen Hemminger 
866c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
867c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
8681e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
869c865e5d9SStephen Hemminger 
870661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
871661b7972Sstephen hemminger 		goto nla_put_failure;
872661b7972Sstephen hemminger 
873861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
8741da177e4SLinus Torvalds 
8751e90474cSPatrick McHardy nla_put_failure:
876861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
8771da177e4SLinus Torvalds 	return -1;
8781da177e4SLinus Torvalds }
8791da177e4SLinus Torvalds 
88010f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
88110f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
88210f6dfcfSstephen hemminger {
88310f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
88410f6dfcfSstephen hemminger 
88510f6dfcfSstephen hemminger 	if (cl != 1) 	/* only one class */
88610f6dfcfSstephen hemminger 		return -ENOENT;
88710f6dfcfSstephen hemminger 
88810f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
88910f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
89010f6dfcfSstephen hemminger 
89110f6dfcfSstephen hemminger 	return 0;
89210f6dfcfSstephen hemminger }
89310f6dfcfSstephen hemminger 
89410f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
89510f6dfcfSstephen hemminger 		     struct Qdisc **old)
89610f6dfcfSstephen hemminger {
89710f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
89810f6dfcfSstephen hemminger 
89910f6dfcfSstephen hemminger 	if (new == NULL)
90010f6dfcfSstephen hemminger 		new = &noop_qdisc;
90110f6dfcfSstephen hemminger 
90210f6dfcfSstephen hemminger 	sch_tree_lock(sch);
90310f6dfcfSstephen hemminger 	*old = q->qdisc;
90410f6dfcfSstephen hemminger 	q->qdisc = new;
90510f6dfcfSstephen hemminger 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
90610f6dfcfSstephen hemminger 	qdisc_reset(*old);
90710f6dfcfSstephen hemminger 	sch_tree_unlock(sch);
90810f6dfcfSstephen hemminger 
90910f6dfcfSstephen hemminger 	return 0;
91010f6dfcfSstephen hemminger }
91110f6dfcfSstephen hemminger 
91210f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
91310f6dfcfSstephen hemminger {
91410f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
91510f6dfcfSstephen hemminger 	return q->qdisc;
91610f6dfcfSstephen hemminger }
91710f6dfcfSstephen hemminger 
91810f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
91910f6dfcfSstephen hemminger {
92010f6dfcfSstephen hemminger 	return 1;
92110f6dfcfSstephen hemminger }
92210f6dfcfSstephen hemminger 
92310f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
92410f6dfcfSstephen hemminger {
92510f6dfcfSstephen hemminger }
92610f6dfcfSstephen hemminger 
92710f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
92810f6dfcfSstephen hemminger {
92910f6dfcfSstephen hemminger 	if (!walker->stop) {
93010f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
93110f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
93210f6dfcfSstephen hemminger 				walker->stop = 1;
93310f6dfcfSstephen hemminger 				return;
93410f6dfcfSstephen hemminger 			}
93510f6dfcfSstephen hemminger 		walker->count++;
93610f6dfcfSstephen hemminger 	}
93710f6dfcfSstephen hemminger }
93810f6dfcfSstephen hemminger 
93910f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
94010f6dfcfSstephen hemminger 	.graft		=	netem_graft,
94110f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
94210f6dfcfSstephen hemminger 	.get		=	netem_get,
94310f6dfcfSstephen hemminger 	.put		=	netem_put,
94410f6dfcfSstephen hemminger 	.walk		=	netem_walk,
94510f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
94610f6dfcfSstephen hemminger };
94710f6dfcfSstephen hemminger 
94820fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
9491da177e4SLinus Torvalds 	.id		=	"netem",
95010f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
9511da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
9521da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
9531da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
95477be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
9551da177e4SLinus Torvalds 	.drop		=	netem_drop,
9561da177e4SLinus Torvalds 	.init		=	netem_init,
9571da177e4SLinus Torvalds 	.reset		=	netem_reset,
9581da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
9591da177e4SLinus Torvalds 	.change		=	netem_change,
9601da177e4SLinus Torvalds 	.dump		=	netem_dump,
9611da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
9621da177e4SLinus Torvalds };
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds 
9651da177e4SLinus Torvalds static int __init netem_module_init(void)
9661da177e4SLinus Torvalds {
967eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
9681da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
9691da177e4SLinus Torvalds }
9701da177e4SLinus Torvalds static void __exit netem_module_exit(void)
9711da177e4SLinus Torvalds {
9721da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
9731da177e4SLinus Torvalds }
9741da177e4SLinus Torvalds module_init(netem_module_init)
9751da177e4SLinus Torvalds module_exit(netem_module_exit)
9761da177e4SLinus Torvalds MODULE_LICENSE("GPL");
977