xref: /openbmc/linux/net/sched/sch_netem.c (revision 250a65f7)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/module.h>
175a0e3ad6STejun Heo #include <linux/slab.h>
181da177e4SLinus Torvalds #include <linux/types.h>
191da177e4SLinus Torvalds #include <linux/kernel.h>
201da177e4SLinus Torvalds #include <linux/errno.h>
211da177e4SLinus Torvalds #include <linux/skbuff.h>
221da177e4SLinus Torvalds #include <linux/rtnetlink.h>
231da177e4SLinus Torvalds 
24dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
251da177e4SLinus Torvalds #include <net/pkt_sched.h>
261da177e4SLinus Torvalds 
27*250a65f7Sstephen hemminger #define VERSION "1.3"
28eb229c4cSStephen Hemminger 
291da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
301da177e4SLinus Torvalds 	====================================
311da177e4SLinus Torvalds 
321da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
331da177e4SLinus Torvalds 		 Network Emulation Tool
341da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds 	 ----------------------------------------------------------------
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
391da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
401da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
411da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
421da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
431da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
441da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
451da177e4SLinus Torvalds 
461da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
471da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
481da177e4SLinus Torvalds 	 control either since that can be handled by using token
491da177e4SLinus Torvalds 	 bucket or other rate control.
50661b7972Sstephen hemminger 
51661b7972Sstephen hemminger      Correlated Loss Generator models
52661b7972Sstephen hemminger 
53661b7972Sstephen hemminger 	Added generation of correlated loss according to the
54661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
55661b7972Sstephen hemminger 
56661b7972Sstephen hemminger 	References:
57661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
58661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
59661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
60661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
61661b7972Sstephen hemminger 
62661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
63661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
641da177e4SLinus Torvalds */
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds struct netem_sched_data {
671da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
6859cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
691da177e4SLinus Torvalds 
70b407621cSStephen Hemminger 	psched_tdiff_t latency;
71b407621cSStephen Hemminger 	psched_tdiff_t jitter;
72b407621cSStephen Hemminger 
731da177e4SLinus Torvalds 	u32 loss;
741da177e4SLinus Torvalds 	u32 limit;
751da177e4SLinus Torvalds 	u32 counter;
761da177e4SLinus Torvalds 	u32 gap;
771da177e4SLinus Torvalds 	u32 duplicate;
780dca51d3SStephen Hemminger 	u32 reorder;
79c865e5d9SStephen Hemminger 	u32 corrupt;
801da177e4SLinus Torvalds 
811da177e4SLinus Torvalds 	struct crndstate {
82b407621cSStephen Hemminger 		u32 last;
83b407621cSStephen Hemminger 		u32 rho;
84c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds 	struct disttable {
871da177e4SLinus Torvalds 		u32  size;
881da177e4SLinus Torvalds 		s16 table[0];
891da177e4SLinus Torvalds 	} *delay_dist;
90661b7972Sstephen hemminger 
91661b7972Sstephen hemminger 	enum  {
92661b7972Sstephen hemminger 		CLG_RANDOM,
93661b7972Sstephen hemminger 		CLG_4_STATES,
94661b7972Sstephen hemminger 		CLG_GILB_ELL,
95661b7972Sstephen hemminger 	} loss_model;
96661b7972Sstephen hemminger 
97661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
98661b7972Sstephen hemminger 	struct clgstate {
99661b7972Sstephen hemminger 		/* state of the Markov chain */
100661b7972Sstephen hemminger 		u8 state;
101661b7972Sstephen hemminger 
102661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
103661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
104661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
105661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
106661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
107661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
108661b7972Sstephen hemminger 	} clg;
109661b7972Sstephen hemminger 
1101da177e4SLinus Torvalds };
1111da177e4SLinus Torvalds 
1121da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
1131da177e4SLinus Torvalds struct netem_skb_cb {
1141da177e4SLinus Torvalds 	psched_time_t	time_to_send;
1151da177e4SLinus Torvalds };
1161da177e4SLinus Torvalds 
1175f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1185f86173bSJussi Kivilinna {
119175f9c1bSJussi Kivilinna 	BUILD_BUG_ON(sizeof(skb->cb) <
120175f9c1bSJussi Kivilinna 		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
121175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1225f86173bSJussi Kivilinna }
1235f86173bSJussi Kivilinna 
1241da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1251da177e4SLinus Torvalds  * Use entropy source for initial seed.
1261da177e4SLinus Torvalds  */
1271da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1281da177e4SLinus Torvalds {
1291da177e4SLinus Torvalds 	state->rho = rho;
1301da177e4SLinus Torvalds 	state->last = net_random();
1311da177e4SLinus Torvalds }
1321da177e4SLinus Torvalds 
1331da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1341da177e4SLinus Torvalds  * Next number depends on last value.
1351da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1361da177e4SLinus Torvalds  */
137b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1381da177e4SLinus Torvalds {
1391da177e4SLinus Torvalds 	u64 value, rho;
1401da177e4SLinus Torvalds 	unsigned long answer;
1411da177e4SLinus Torvalds 
142bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1431da177e4SLinus Torvalds 		return net_random();
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds 	value = net_random();
1461da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1471da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1481da177e4SLinus Torvalds 	state->last = answer;
1491da177e4SLinus Torvalds 	return answer;
1501da177e4SLinus Torvalds }
1511da177e4SLinus Torvalds 
152661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
153661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
154661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
155661b7972Sstephen hemminger  */
156661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
157661b7972Sstephen hemminger {
158661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
159661b7972Sstephen hemminger 	u32 rnd = net_random();
160661b7972Sstephen hemminger 
161661b7972Sstephen hemminger 	/*
162661b7972Sstephen hemminger 	 * Makes a comparision between rnd and the transition
163661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
164661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
165661b7972Sstephen hemminger 	 * The four states correspond to:
166661b7972Sstephen hemminger 	 *   1 => successfully transmitted packets within a gap period
167661b7972Sstephen hemminger 	 *   4 => isolated losses within a gap period
168661b7972Sstephen hemminger 	 *   3 => lost packets within a burst period
169661b7972Sstephen hemminger 	 *   2 => successfully transmitted packets within a burst period
170661b7972Sstephen hemminger 	 */
171661b7972Sstephen hemminger 	switch (clg->state) {
172661b7972Sstephen hemminger 	case 1:
173661b7972Sstephen hemminger 		if (rnd < clg->a4) {
174661b7972Sstephen hemminger 			clg->state = 4;
175661b7972Sstephen hemminger 			return true;
176661b7972Sstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1) {
177661b7972Sstephen hemminger 			clg->state = 3;
178661b7972Sstephen hemminger 			return true;
179661b7972Sstephen hemminger 		} else if (clg->a1 < rnd)
180661b7972Sstephen hemminger 			clg->state = 1;
181661b7972Sstephen hemminger 
182661b7972Sstephen hemminger 		break;
183661b7972Sstephen hemminger 	case 2:
184661b7972Sstephen hemminger 		if (rnd < clg->a5) {
185661b7972Sstephen hemminger 			clg->state = 3;
186661b7972Sstephen hemminger 			return true;
187661b7972Sstephen hemminger 		} else
188661b7972Sstephen hemminger 			clg->state = 2;
189661b7972Sstephen hemminger 
190661b7972Sstephen hemminger 		break;
191661b7972Sstephen hemminger 	case 3:
192661b7972Sstephen hemminger 		if (rnd < clg->a3)
193661b7972Sstephen hemminger 			clg->state = 2;
194661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
195661b7972Sstephen hemminger 			clg->state = 1;
196661b7972Sstephen hemminger 			return true;
197661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
198661b7972Sstephen hemminger 			clg->state = 3;
199661b7972Sstephen hemminger 			return true;
200661b7972Sstephen hemminger 		}
201661b7972Sstephen hemminger 		break;
202661b7972Sstephen hemminger 	case 4:
203661b7972Sstephen hemminger 		clg->state = 1;
204661b7972Sstephen hemminger 		break;
205661b7972Sstephen hemminger 	}
206661b7972Sstephen hemminger 
207661b7972Sstephen hemminger 	return false;
208661b7972Sstephen hemminger }
209661b7972Sstephen hemminger 
210661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
211661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
212661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
213661b7972Sstephen hemminger  *
214661b7972Sstephen hemminger  * Makes a comparision between random number and the transition
215661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
216661b7972Sstephen hemminger  * next state. A second random number is extracted and the comparision
217661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
218661b7972Sstephen hemminger  * packet will be transmitted or lost.
219661b7972Sstephen hemminger  */
220661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
221661b7972Sstephen hemminger {
222661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
223661b7972Sstephen hemminger 
224661b7972Sstephen hemminger 	switch (clg->state) {
225661b7972Sstephen hemminger 	case 1:
226661b7972Sstephen hemminger 		if (net_random() < clg->a1)
227661b7972Sstephen hemminger 			clg->state = 2;
228661b7972Sstephen hemminger 		if (net_random() < clg->a4)
229661b7972Sstephen hemminger 			return true;
230661b7972Sstephen hemminger 	case 2:
231661b7972Sstephen hemminger 		if (net_random() < clg->a2)
232661b7972Sstephen hemminger 			clg->state = 1;
233661b7972Sstephen hemminger 		if (clg->a3 > net_random())
234661b7972Sstephen hemminger 			return true;
235661b7972Sstephen hemminger 	}
236661b7972Sstephen hemminger 
237661b7972Sstephen hemminger 	return false;
238661b7972Sstephen hemminger }
239661b7972Sstephen hemminger 
240661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
241661b7972Sstephen hemminger {
242661b7972Sstephen hemminger 	switch (q->loss_model) {
243661b7972Sstephen hemminger 	case CLG_RANDOM:
244661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
245661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
246661b7972Sstephen hemminger 
247661b7972Sstephen hemminger 	case CLG_4_STATES:
248661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
249661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
250661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
251661b7972Sstephen hemminger 		* the kernel logs
252661b7972Sstephen hemminger 		*/
253661b7972Sstephen hemminger 		return loss_4state(q);
254661b7972Sstephen hemminger 
255661b7972Sstephen hemminger 	case CLG_GILB_ELL:
256661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
257661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
258661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
259661b7972Sstephen hemminger 		* the kernel logs
260661b7972Sstephen hemminger 		*/
261661b7972Sstephen hemminger 		return loss_gilb_ell(q);
262661b7972Sstephen hemminger 	}
263661b7972Sstephen hemminger 
264661b7972Sstephen hemminger 	return false;	/* not reached */
265661b7972Sstephen hemminger }
266661b7972Sstephen hemminger 
267661b7972Sstephen hemminger 
2681da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
2691da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
2701da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
2711da177e4SLinus Torvalds  */
272b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
273b407621cSStephen Hemminger 				struct crndstate *state,
274b407621cSStephen Hemminger 				const struct disttable *dist)
2751da177e4SLinus Torvalds {
276b407621cSStephen Hemminger 	psched_tdiff_t x;
277b407621cSStephen Hemminger 	long t;
278b407621cSStephen Hemminger 	u32 rnd;
2791da177e4SLinus Torvalds 
2801da177e4SLinus Torvalds 	if (sigma == 0)
2811da177e4SLinus Torvalds 		return mu;
2821da177e4SLinus Torvalds 
2831da177e4SLinus Torvalds 	rnd = get_crandom(state);
2841da177e4SLinus Torvalds 
2851da177e4SLinus Torvalds 	/* default uniform distribution */
2861da177e4SLinus Torvalds 	if (dist == NULL)
2871da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
2881da177e4SLinus Torvalds 
2891da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
2901da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
2911da177e4SLinus Torvalds 	if (x >= 0)
2921da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
2931da177e4SLinus Torvalds 	else
2941da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
2951da177e4SLinus Torvalds 
2961da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
2971da177e4SLinus Torvalds }
2981da177e4SLinus Torvalds 
2990afb51e7SStephen Hemminger /*
3000afb51e7SStephen Hemminger  * Insert one skb into qdisc.
3010afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
3020afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
3030afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
3040afb51e7SStephen Hemminger  */
3051da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
3061da177e4SLinus Torvalds {
3071da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
30889e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
30989e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
3100afb51e7SStephen Hemminger 	struct sk_buff *skb2;
3111da177e4SLinus Torvalds 	int ret;
3120afb51e7SStephen Hemminger 	int count = 1;
3131da177e4SLinus Torvalds 
3140afb51e7SStephen Hemminger 	/* Random duplication */
3150afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
3160afb51e7SStephen Hemminger 		++count;
3170afb51e7SStephen Hemminger 
318661b7972Sstephen hemminger 	/* Drop packet? */
319661b7972Sstephen hemminger 	if (loss_event(q))
3200afb51e7SStephen Hemminger 		--count;
3210afb51e7SStephen Hemminger 
3220afb51e7SStephen Hemminger 	if (count == 0) {
3231da177e4SLinus Torvalds 		sch->qstats.drops++;
3241da177e4SLinus Torvalds 		kfree_skb(skb);
325c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3261da177e4SLinus Torvalds 	}
3271da177e4SLinus Torvalds 
3284e8a5201SDavid S. Miller 	skb_orphan(skb);
3294e8a5201SDavid S. Miller 
3300afb51e7SStephen Hemminger 	/*
3310afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
3320afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
3330afb51e7SStephen Hemminger 	 * skb will be queued.
334d5d75cd6SStephen Hemminger 	 */
3350afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
3367698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
3370afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
3380afb51e7SStephen Hemminger 		q->duplicate = 0;
339d5d75cd6SStephen Hemminger 
3405f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
3410afb51e7SStephen Hemminger 		q->duplicate = dupsave;
3421da177e4SLinus Torvalds 	}
3431da177e4SLinus Torvalds 
344c865e5d9SStephen Hemminger 	/*
345c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
346c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
347c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
348c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
349c865e5d9SStephen Hemminger 	 */
350c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
351f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
352f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
353f64f9e71SJoe Perches 		     skb_checksum_help(skb))) {
354c865e5d9SStephen Hemminger 			sch->qstats.drops++;
355c865e5d9SStephen Hemminger 			return NET_XMIT_DROP;
356c865e5d9SStephen Hemminger 		}
357c865e5d9SStephen Hemminger 
358c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
359c865e5d9SStephen Hemminger 	}
360c865e5d9SStephen Hemminger 
3615f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
362f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
363f64f9e71SJoe Perches 	    q->counter < q->gap ||	/* inside last reordering gap */
364f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
3650f9f32acSStephen Hemminger 		psched_time_t now;
36607aaa115SStephen Hemminger 		psched_tdiff_t delay;
36707aaa115SStephen Hemminger 
36807aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
36907aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
37007aaa115SStephen Hemminger 
3713bebcda2SPatrick McHardy 		now = psched_get_time();
3727c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
3731da177e4SLinus Torvalds 		++q->counter;
3745f86173bSJussi Kivilinna 		ret = qdisc_enqueue(skb, q->qdisc);
3751da177e4SLinus Torvalds 	} else {
3760dca51d3SStephen Hemminger 		/*
3770dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
3780dca51d3SStephen Hemminger 		 * of the queue.
3790dca51d3SStephen Hemminger 		 */
3803bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
3810dca51d3SStephen Hemminger 		q->counter = 0;
3828ba25dadSJarek Poplawski 
3838ba25dadSJarek Poplawski 		__skb_queue_head(&q->qdisc->q, skb);
3848ba25dadSJarek Poplawski 		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
3858ba25dadSJarek Poplawski 		q->qdisc->qstats.requeues++;
3868ba25dadSJarek Poplawski 		ret = NET_XMIT_SUCCESS;
3871da177e4SLinus Torvalds 	}
3881da177e4SLinus Torvalds 
38910f6dfcfSstephen hemminger 	if (ret != NET_XMIT_SUCCESS) {
39010f6dfcfSstephen hemminger 		if (net_xmit_drop_count(ret)) {
3911da177e4SLinus Torvalds 			sch->qstats.drops++;
39210f6dfcfSstephen hemminger 			return ret;
39310f6dfcfSstephen hemminger 		}
394378a2f09SJarek Poplawski 	}
3951da177e4SLinus Torvalds 
39610f6dfcfSstephen hemminger 	sch->q.qlen++;
39710f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
3981da177e4SLinus Torvalds }
3991da177e4SLinus Torvalds 
4001da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch)
4011da177e4SLinus Torvalds {
4021da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4036d037a26SPatrick McHardy 	unsigned int len = 0;
4041da177e4SLinus Torvalds 
4056d037a26SPatrick McHardy 	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
4061da177e4SLinus Torvalds 		sch->q.qlen--;
4071da177e4SLinus Torvalds 		sch->qstats.drops++;
4081da177e4SLinus Torvalds 	}
4091da177e4SLinus Torvalds 	return len;
4101da177e4SLinus Torvalds }
4111da177e4SLinus Torvalds 
4121da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
4131da177e4SLinus Torvalds {
4141da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4151da177e4SLinus Torvalds 	struct sk_buff *skb;
4161da177e4SLinus Torvalds 
417fd245a4aSEric Dumazet 	if (qdisc_is_throttled(sch))
41811274e5aSStephen Hemminger 		return NULL;
41911274e5aSStephen Hemminger 
42003c05f0dSJarek Poplawski 	skb = q->qdisc->ops->peek(q->qdisc);
421771018e7SStephen Hemminger 	if (skb) {
4225f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
4233bebcda2SPatrick McHardy 		psched_time_t now = psched_get_time();
4240f9f32acSStephen Hemminger 
4250f9f32acSStephen Hemminger 		/* if more time remaining? */
426104e0878SPatrick McHardy 		if (cb->time_to_send <= now) {
42777be155cSJarek Poplawski 			skb = qdisc_dequeue_peeked(q->qdisc);
42877be155cSJarek Poplawski 			if (unlikely(!skb))
42903c05f0dSJarek Poplawski 				return NULL;
43003c05f0dSJarek Poplawski 
4318caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
4328caf1539SJarek Poplawski 			/*
4338caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
4348caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
4358caf1539SJarek Poplawski 			 */
4368caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
4378caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
4388caf1539SJarek Poplawski #endif
43910f6dfcfSstephen hemminger 
4401da177e4SLinus Torvalds 			sch->q.qlen--;
44110f6dfcfSstephen hemminger 			qdisc_unthrottled(sch);
44210f6dfcfSstephen hemminger 			qdisc_bstats_update(sch, skb);
4430f9f32acSStephen Hemminger 			return skb;
44411274e5aSStephen Hemminger 		}
44507aaa115SStephen Hemminger 
44611274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
4470f9f32acSStephen Hemminger 	}
4480f9f32acSStephen Hemminger 
4490f9f32acSStephen Hemminger 	return NULL;
4501da177e4SLinus Torvalds }
4511da177e4SLinus Torvalds 
4521da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
4531da177e4SLinus Torvalds {
4541da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4551da177e4SLinus Torvalds 
4561da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
4571da177e4SLinus Torvalds 	sch->q.qlen = 0;
45859cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
4591da177e4SLinus Torvalds }
4601da177e4SLinus Torvalds 
4616373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
4626373a9a2Sstephen hemminger {
4636373a9a2Sstephen hemminger 	if (d) {
4646373a9a2Sstephen hemminger 		if (is_vmalloc_addr(d))
4656373a9a2Sstephen hemminger 			vfree(d);
4666373a9a2Sstephen hemminger 		else
4676373a9a2Sstephen hemminger 			kfree(d);
4686373a9a2Sstephen hemminger 	}
4696373a9a2Sstephen hemminger }
4706373a9a2Sstephen hemminger 
4711da177e4SLinus Torvalds /*
4721da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
4731da177e4SLinus Torvalds  * signed 16 bit values.
4741da177e4SLinus Torvalds  */
4751e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
4761da177e4SLinus Torvalds {
4771da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4786373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
4791e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
4807698b4fcSDavid S. Miller 	spinlock_t *root_lock;
4811da177e4SLinus Torvalds 	struct disttable *d;
4821da177e4SLinus Torvalds 	int i;
4836373a9a2Sstephen hemminger 	size_t s;
4841da177e4SLinus Torvalds 
485df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
4861da177e4SLinus Torvalds 		return -EINVAL;
4871da177e4SLinus Torvalds 
4886373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
4896373a9a2Sstephen hemminger 	d = kmalloc(s, GFP_KERNEL);
4906373a9a2Sstephen hemminger 	if (!d)
4916373a9a2Sstephen hemminger 		d = vmalloc(s);
4921da177e4SLinus Torvalds 	if (!d)
4931da177e4SLinus Torvalds 		return -ENOMEM;
4941da177e4SLinus Torvalds 
4951da177e4SLinus Torvalds 	d->size = n;
4961da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
4971da177e4SLinus Torvalds 		d->table[i] = data[i];
4981da177e4SLinus Torvalds 
499102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
5007698b4fcSDavid S. Miller 
5017698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
5026373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
503b94c8afcSPatrick McHardy 	q->delay_dist = d;
5047698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
5051da177e4SLinus Torvalds 	return 0;
5061da177e4SLinus Torvalds }
5071da177e4SLinus Torvalds 
508265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
5091da177e4SLinus Torvalds {
5101da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5111e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
5141da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
5151da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
5161da177e4SLinus Torvalds }
5171da177e4SLinus Torvalds 
518265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
5190dca51d3SStephen Hemminger {
5200dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
5211e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
5220dca51d3SStephen Hemminger 
5230dca51d3SStephen Hemminger 	q->reorder = r->probability;
5240dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
5250dca51d3SStephen Hemminger }
5260dca51d3SStephen Hemminger 
527265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
528c865e5d9SStephen Hemminger {
529c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
5301e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
531c865e5d9SStephen Hemminger 
532c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
533c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
534c865e5d9SStephen Hemminger }
535c865e5d9SStephen Hemminger 
536661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
537661b7972Sstephen hemminger {
538661b7972Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
539661b7972Sstephen hemminger 	const struct nlattr *la;
540661b7972Sstephen hemminger 	int rem;
541661b7972Sstephen hemminger 
542661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
543661b7972Sstephen hemminger 		u16 type = nla_type(la);
544661b7972Sstephen hemminger 
545661b7972Sstephen hemminger 		switch(type) {
546661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
547661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
548661b7972Sstephen hemminger 
549661b7972Sstephen hemminger 			if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
550661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
551661b7972Sstephen hemminger 				return -EINVAL;
552661b7972Sstephen hemminger 			}
553661b7972Sstephen hemminger 
554661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
555661b7972Sstephen hemminger 
556661b7972Sstephen hemminger 			q->clg.state = 1;
557661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
558661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
559661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
560661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
561661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
562661b7972Sstephen hemminger 			break;
563661b7972Sstephen hemminger 		}
564661b7972Sstephen hemminger 
565661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
566661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
567661b7972Sstephen hemminger 
568661b7972Sstephen hemminger 			if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
569661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
570661b7972Sstephen hemminger 				return -EINVAL;
571661b7972Sstephen hemminger 			}
572661b7972Sstephen hemminger 
573661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
574661b7972Sstephen hemminger 			q->clg.state = 1;
575661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
576661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
577661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
578661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
579661b7972Sstephen hemminger 			break;
580661b7972Sstephen hemminger 		}
581661b7972Sstephen hemminger 
582661b7972Sstephen hemminger 		default:
583661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
584661b7972Sstephen hemminger 			return -EINVAL;
585661b7972Sstephen hemminger 		}
586661b7972Sstephen hemminger 	}
587661b7972Sstephen hemminger 
588661b7972Sstephen hemminger 	return 0;
589661b7972Sstephen hemminger }
590661b7972Sstephen hemminger 
59127a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
59227a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
59327a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
59427a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
595661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
59627a3421eSPatrick McHardy };
59727a3421eSPatrick McHardy 
5982c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
5992c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
6002c10b32bSThomas Graf {
6012c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
6022c10b32bSThomas Graf 
603661b7972Sstephen hemminger 	if (nested_len < 0) {
604661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
6052c10b32bSThomas Graf 		return -EINVAL;
606661b7972Sstephen hemminger 	}
607661b7972Sstephen hemminger 
6082c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
6092c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
6102c10b32bSThomas Graf 				 nested_len, policy);
611661b7972Sstephen hemminger 
6122c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
6132c10b32bSThomas Graf 	return 0;
6142c10b32bSThomas Graf }
6152c10b32bSThomas Graf 
616c865e5d9SStephen Hemminger /* Parse netlink message to set options */
6171e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
6181da177e4SLinus Torvalds {
6191da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
620b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
6211da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
6221da177e4SLinus Torvalds 	int ret;
6231da177e4SLinus Torvalds 
624b03f4672SPatrick McHardy 	if (opt == NULL)
6251da177e4SLinus Torvalds 		return -EINVAL;
6261da177e4SLinus Torvalds 
6272c10b32bSThomas Graf 	qopt = nla_data(opt);
6282c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
629b03f4672SPatrick McHardy 	if (ret < 0)
630b03f4672SPatrick McHardy 		return ret;
631b03f4672SPatrick McHardy 
632fb0305ceSPatrick McHardy 	ret = fifo_set_limit(q->qdisc, qopt->limit);
6331da177e4SLinus Torvalds 	if (ret) {
634*250a65f7Sstephen hemminger 		pr_info("netem: can't set fifo limit\n");
6351da177e4SLinus Torvalds 		return ret;
6361da177e4SLinus Torvalds 	}
6371da177e4SLinus Torvalds 
6381da177e4SLinus Torvalds 	q->latency = qopt->latency;
6391da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
6401da177e4SLinus Torvalds 	q->limit = qopt->limit;
6411da177e4SLinus Torvalds 	q->gap = qopt->gap;
6420dca51d3SStephen Hemminger 	q->counter = 0;
6431da177e4SLinus Torvalds 	q->loss = qopt->loss;
6441da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
6451da177e4SLinus Torvalds 
646bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
647bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
6480dca51d3SStephen Hemminger 	 */
649a362e0a7SStephen Hemminger 	if (q->gap)
6500dca51d3SStephen Hemminger 		q->reorder = ~0;
6510dca51d3SStephen Hemminger 
652265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
653265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
6541da177e4SLinus Torvalds 
6551e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
6561e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
6571da177e4SLinus Torvalds 		if (ret)
6581da177e4SLinus Torvalds 			return ret;
6591da177e4SLinus Torvalds 	}
660c865e5d9SStephen Hemminger 
661265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
662265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
6631da177e4SLinus Torvalds 
664265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
665265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
6661da177e4SLinus Torvalds 
667661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
668661b7972Sstephen hemminger 	if (tb[TCA_NETEM_LOSS])
669661b7972Sstephen hemminger 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
670661b7972Sstephen hemminger 
671661b7972Sstephen hemminger 	return ret;
6721da177e4SLinus Torvalds }
6731da177e4SLinus Torvalds 
674300ce174SStephen Hemminger /*
675300ce174SStephen Hemminger  * Special case version of FIFO queue for use by netem.
676300ce174SStephen Hemminger  * It queues in order based on timestamps in skb's
677300ce174SStephen Hemminger  */
678300ce174SStephen Hemminger struct fifo_sched_data {
679300ce174SStephen Hemminger 	u32 limit;
680075aa573SStephen Hemminger 	psched_time_t oldest;
681300ce174SStephen Hemminger };
682300ce174SStephen Hemminger 
683300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
684300ce174SStephen Hemminger {
685300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
686300ce174SStephen Hemminger 	struct sk_buff_head *list = &sch->q;
6875f86173bSJussi Kivilinna 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
688300ce174SStephen Hemminger 	struct sk_buff *skb;
689300ce174SStephen Hemminger 
690300ce174SStephen Hemminger 	if (likely(skb_queue_len(list) < q->limit)) {
691075aa573SStephen Hemminger 		/* Optimize for add at tail */
692104e0878SPatrick McHardy 		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
693075aa573SStephen Hemminger 			q->oldest = tnext;
694075aa573SStephen Hemminger 			return qdisc_enqueue_tail(nskb, sch);
695075aa573SStephen Hemminger 		}
696075aa573SStephen Hemminger 
697300ce174SStephen Hemminger 		skb_queue_reverse_walk(list, skb) {
6985f86173bSJussi Kivilinna 			const struct netem_skb_cb *cb = netem_skb_cb(skb);
699300ce174SStephen Hemminger 
700104e0878SPatrick McHardy 			if (tnext >= cb->time_to_send)
701300ce174SStephen Hemminger 				break;
702300ce174SStephen Hemminger 		}
703300ce174SStephen Hemminger 
704300ce174SStephen Hemminger 		__skb_queue_after(list, skb, nskb);
705300ce174SStephen Hemminger 
7060abf77e5SJussi Kivilinna 		sch->qstats.backlog += qdisc_pkt_len(nskb);
707300ce174SStephen Hemminger 
708300ce174SStephen Hemminger 		return NET_XMIT_SUCCESS;
709300ce174SStephen Hemminger 	}
710300ce174SStephen Hemminger 
711075aa573SStephen Hemminger 	return qdisc_reshape_fail(nskb, sch);
712300ce174SStephen Hemminger }
713300ce174SStephen Hemminger 
7141e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
715300ce174SStephen Hemminger {
716300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
717300ce174SStephen Hemminger 
718300ce174SStephen Hemminger 	if (opt) {
7191e90474cSPatrick McHardy 		struct tc_fifo_qopt *ctl = nla_data(opt);
7201e90474cSPatrick McHardy 		if (nla_len(opt) < sizeof(*ctl))
721300ce174SStephen Hemminger 			return -EINVAL;
722300ce174SStephen Hemminger 
723300ce174SStephen Hemminger 		q->limit = ctl->limit;
724300ce174SStephen Hemminger 	} else
7255ce2d488SDavid S. Miller 		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
726300ce174SStephen Hemminger 
727a084980dSPatrick McHardy 	q->oldest = PSCHED_PASTPERFECT;
728300ce174SStephen Hemminger 	return 0;
729300ce174SStephen Hemminger }
730300ce174SStephen Hemminger 
731300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
732300ce174SStephen Hemminger {
733300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
734300ce174SStephen Hemminger 	struct tc_fifo_qopt opt = { .limit = q->limit };
735300ce174SStephen Hemminger 
7361e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
737300ce174SStephen Hemminger 	return skb->len;
738300ce174SStephen Hemminger 
7391e90474cSPatrick McHardy nla_put_failure:
740300ce174SStephen Hemminger 	return -1;
741300ce174SStephen Hemminger }
742300ce174SStephen Hemminger 
74320fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
744300ce174SStephen Hemminger 	.id		=	"tfifo",
745300ce174SStephen Hemminger 	.priv_size	=	sizeof(struct fifo_sched_data),
746300ce174SStephen Hemminger 	.enqueue	=	tfifo_enqueue,
747300ce174SStephen Hemminger 	.dequeue	=	qdisc_dequeue_head,
7488e3af978SJarek Poplawski 	.peek		=	qdisc_peek_head,
749300ce174SStephen Hemminger 	.drop		=	qdisc_queue_drop,
750300ce174SStephen Hemminger 	.init		=	tfifo_init,
751300ce174SStephen Hemminger 	.reset		=	qdisc_reset_queue,
752300ce174SStephen Hemminger 	.change		=	tfifo_init,
753300ce174SStephen Hemminger 	.dump		=	tfifo_dump,
754300ce174SStephen Hemminger };
755300ce174SStephen Hemminger 
7561e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
7571da177e4SLinus Torvalds {
7581da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7591da177e4SLinus Torvalds 	int ret;
7601da177e4SLinus Torvalds 
7611da177e4SLinus Torvalds 	if (!opt)
7621da177e4SLinus Torvalds 		return -EINVAL;
7631da177e4SLinus Torvalds 
76459cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
7651da177e4SLinus Torvalds 
766661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
7673511c913SChangli Gao 	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
7689f9afec4SPatrick McHardy 				     TC_H_MAKE(sch->handle, 1));
7691da177e4SLinus Torvalds 	if (!q->qdisc) {
770*250a65f7Sstephen hemminger 		pr_notice("netem: qdisc create tfifo qdisc failed\n");
7711da177e4SLinus Torvalds 		return -ENOMEM;
7721da177e4SLinus Torvalds 	}
7731da177e4SLinus Torvalds 
7741da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
7751da177e4SLinus Torvalds 	if (ret) {
776*250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
7771da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
7781da177e4SLinus Torvalds 	}
7791da177e4SLinus Torvalds 	return ret;
7801da177e4SLinus Torvalds }
7811da177e4SLinus Torvalds 
7821da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
7831da177e4SLinus Torvalds {
7841da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7851da177e4SLinus Torvalds 
78659cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
7871da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
7886373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
7891da177e4SLinus Torvalds }
7901da177e4SLinus Torvalds 
791661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
792661b7972Sstephen hemminger 			   struct sk_buff *skb)
793661b7972Sstephen hemminger {
794661b7972Sstephen hemminger 	struct nlattr *nest;
795661b7972Sstephen hemminger 
796661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
797661b7972Sstephen hemminger 	if (nest == NULL)
798661b7972Sstephen hemminger 		goto nla_put_failure;
799661b7972Sstephen hemminger 
800661b7972Sstephen hemminger 	switch (q->loss_model) {
801661b7972Sstephen hemminger 	case CLG_RANDOM:
802661b7972Sstephen hemminger 		/* legacy loss model */
803661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
804661b7972Sstephen hemminger 		return 0;	/* no data */
805661b7972Sstephen hemminger 
806661b7972Sstephen hemminger 	case CLG_4_STATES: {
807661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
808661b7972Sstephen hemminger 			.p13 = q->clg.a1,
809661b7972Sstephen hemminger 			.p31 = q->clg.a2,
810661b7972Sstephen hemminger 			.p32 = q->clg.a3,
811661b7972Sstephen hemminger 			.p14 = q->clg.a4,
812661b7972Sstephen hemminger 			.p23 = q->clg.a5,
813661b7972Sstephen hemminger 		};
814661b7972Sstephen hemminger 
815661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
816661b7972Sstephen hemminger 		break;
817661b7972Sstephen hemminger 	}
818661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
819661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
820661b7972Sstephen hemminger 			.p = q->clg.a1,
821661b7972Sstephen hemminger 			.r = q->clg.a2,
822661b7972Sstephen hemminger 			.h = q->clg.a3,
823661b7972Sstephen hemminger 			.k1 = q->clg.a4,
824661b7972Sstephen hemminger 		};
825661b7972Sstephen hemminger 
826661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
827661b7972Sstephen hemminger 		break;
828661b7972Sstephen hemminger 	}
829661b7972Sstephen hemminger 	}
830661b7972Sstephen hemminger 
831661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
832661b7972Sstephen hemminger 	return 0;
833661b7972Sstephen hemminger 
834661b7972Sstephen hemminger nla_put_failure:
835661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
836661b7972Sstephen hemminger 	return -1;
837661b7972Sstephen hemminger }
838661b7972Sstephen hemminger 
8391da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
8401da177e4SLinus Torvalds {
8411da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
842861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
8431da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
8441da177e4SLinus Torvalds 	struct tc_netem_corr cor;
8450dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
846c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
8471da177e4SLinus Torvalds 
8481da177e4SLinus Torvalds 	qopt.latency = q->latency;
8491da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
8501da177e4SLinus Torvalds 	qopt.limit = q->limit;
8511da177e4SLinus Torvalds 	qopt.loss = q->loss;
8521da177e4SLinus Torvalds 	qopt.gap = q->gap;
8531da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
8541e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
8551da177e4SLinus Torvalds 
8561da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
8571da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
8581da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
8591e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
8600dca51d3SStephen Hemminger 
8610dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
8620dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
8631e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
8640dca51d3SStephen Hemminger 
865c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
866c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
8671e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
868c865e5d9SStephen Hemminger 
869661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
870661b7972Sstephen hemminger 		goto nla_put_failure;
871661b7972Sstephen hemminger 
872861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
8731da177e4SLinus Torvalds 
8741e90474cSPatrick McHardy nla_put_failure:
875861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
8761da177e4SLinus Torvalds 	return -1;
8771da177e4SLinus Torvalds }
8781da177e4SLinus Torvalds 
87910f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
88010f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
88110f6dfcfSstephen hemminger {
88210f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
88310f6dfcfSstephen hemminger 
88410f6dfcfSstephen hemminger 	if (cl != 1) 	/* only one class */
88510f6dfcfSstephen hemminger 		return -ENOENT;
88610f6dfcfSstephen hemminger 
88710f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
88810f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
88910f6dfcfSstephen hemminger 
89010f6dfcfSstephen hemminger 	return 0;
89110f6dfcfSstephen hemminger }
89210f6dfcfSstephen hemminger 
89310f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
89410f6dfcfSstephen hemminger 		     struct Qdisc **old)
89510f6dfcfSstephen hemminger {
89610f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
89710f6dfcfSstephen hemminger 
89810f6dfcfSstephen hemminger 	if (new == NULL)
89910f6dfcfSstephen hemminger 		new = &noop_qdisc;
90010f6dfcfSstephen hemminger 
90110f6dfcfSstephen hemminger 	sch_tree_lock(sch);
90210f6dfcfSstephen hemminger 	*old = q->qdisc;
90310f6dfcfSstephen hemminger 	q->qdisc = new;
90410f6dfcfSstephen hemminger 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
90510f6dfcfSstephen hemminger 	qdisc_reset(*old);
90610f6dfcfSstephen hemminger 	sch_tree_unlock(sch);
90710f6dfcfSstephen hemminger 
90810f6dfcfSstephen hemminger 	return 0;
90910f6dfcfSstephen hemminger }
91010f6dfcfSstephen hemminger 
91110f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
91210f6dfcfSstephen hemminger {
91310f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
91410f6dfcfSstephen hemminger 	return q->qdisc;
91510f6dfcfSstephen hemminger }
91610f6dfcfSstephen hemminger 
91710f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
91810f6dfcfSstephen hemminger {
91910f6dfcfSstephen hemminger 	return 1;
92010f6dfcfSstephen hemminger }
92110f6dfcfSstephen hemminger 
92210f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
92310f6dfcfSstephen hemminger {
92410f6dfcfSstephen hemminger }
92510f6dfcfSstephen hemminger 
92610f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
92710f6dfcfSstephen hemminger {
92810f6dfcfSstephen hemminger 	if (!walker->stop) {
92910f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
93010f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
93110f6dfcfSstephen hemminger 				walker->stop = 1;
93210f6dfcfSstephen hemminger 				return;
93310f6dfcfSstephen hemminger 			}
93410f6dfcfSstephen hemminger 		walker->count++;
93510f6dfcfSstephen hemminger 	}
93610f6dfcfSstephen hemminger }
93710f6dfcfSstephen hemminger 
93810f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
93910f6dfcfSstephen hemminger 	.graft		=	netem_graft,
94010f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
94110f6dfcfSstephen hemminger 	.get		=	netem_get,
94210f6dfcfSstephen hemminger 	.put		=	netem_put,
94310f6dfcfSstephen hemminger 	.walk		=	netem_walk,
94410f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
94510f6dfcfSstephen hemminger };
94610f6dfcfSstephen hemminger 
94720fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
9481da177e4SLinus Torvalds 	.id		=	"netem",
94910f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
9501da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
9511da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
9521da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
95377be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
9541da177e4SLinus Torvalds 	.drop		=	netem_drop,
9551da177e4SLinus Torvalds 	.init		=	netem_init,
9561da177e4SLinus Torvalds 	.reset		=	netem_reset,
9571da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
9581da177e4SLinus Torvalds 	.change		=	netem_change,
9591da177e4SLinus Torvalds 	.dump		=	netem_dump,
9601da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
9611da177e4SLinus Torvalds };
9621da177e4SLinus Torvalds 
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds static int __init netem_module_init(void)
9651da177e4SLinus Torvalds {
966eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
9671da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
9681da177e4SLinus Torvalds }
9691da177e4SLinus Torvalds static void __exit netem_module_exit(void)
9701da177e4SLinus Torvalds {
9711da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
9721da177e4SLinus Torvalds }
9731da177e4SLinus Torvalds module_init(netem_module_init)
9741da177e4SLinus Torvalds module_exit(netem_module_exit)
9751da177e4SLinus Torvalds MODULE_LICENSE("GPL");
976