xref: /openbmc/linux/net/sched/sch_netem.c (revision fc33cc72)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
16b7f080cfSAlexey Dobriyan #include <linux/mm.h>
171da177e4SLinus Torvalds #include <linux/module.h>
185a0e3ad6STejun Heo #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/skbuff.h>
2378776d3fSDavid S. Miller #include <linux/vmalloc.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
251da177e4SLinus Torvalds 
26dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
271da177e4SLinus Torvalds #include <net/pkt_sched.h>
281da177e4SLinus Torvalds 
29250a65f7Sstephen hemminger #define VERSION "1.3"
30eb229c4cSStephen Hemminger 
311da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
321da177e4SLinus Torvalds 	====================================
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
351da177e4SLinus Torvalds 		 Network Emulation Tool
361da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds 	 ----------------------------------------------------------------
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
411da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
421da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
431da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
441da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
451da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
461da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
491da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
501da177e4SLinus Torvalds 	 control either since that can be handled by using token
511da177e4SLinus Torvalds 	 bucket or other rate control.
52661b7972Sstephen hemminger 
53661b7972Sstephen hemminger      Correlated Loss Generator models
54661b7972Sstephen hemminger 
55661b7972Sstephen hemminger 	Added generation of correlated loss according to the
56661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
57661b7972Sstephen hemminger 
58661b7972Sstephen hemminger 	References:
59661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
60661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
61661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
62661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
63661b7972Sstephen hemminger 
64661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
65661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
661da177e4SLinus Torvalds */
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds struct netem_sched_data {
691da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
7059cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
711da177e4SLinus Torvalds 
72b407621cSStephen Hemminger 	psched_tdiff_t latency;
73b407621cSStephen Hemminger 	psched_tdiff_t jitter;
74b407621cSStephen Hemminger 
751da177e4SLinus Torvalds 	u32 loss;
761da177e4SLinus Torvalds 	u32 limit;
771da177e4SLinus Torvalds 	u32 counter;
781da177e4SLinus Torvalds 	u32 gap;
791da177e4SLinus Torvalds 	u32 duplicate;
800dca51d3SStephen Hemminger 	u32 reorder;
81c865e5d9SStephen Hemminger 	u32 corrupt;
827bc0f28cSHagen Paul Pfeifer 	u32 rate;
831da177e4SLinus Torvalds 
841da177e4SLinus Torvalds 	struct crndstate {
85b407621cSStephen Hemminger 		u32 last;
86b407621cSStephen Hemminger 		u32 rho;
87c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
881da177e4SLinus Torvalds 
891da177e4SLinus Torvalds 	struct disttable {
901da177e4SLinus Torvalds 		u32  size;
911da177e4SLinus Torvalds 		s16 table[0];
921da177e4SLinus Torvalds 	} *delay_dist;
93661b7972Sstephen hemminger 
94661b7972Sstephen hemminger 	enum  {
95661b7972Sstephen hemminger 		CLG_RANDOM,
96661b7972Sstephen hemminger 		CLG_4_STATES,
97661b7972Sstephen hemminger 		CLG_GILB_ELL,
98661b7972Sstephen hemminger 	} loss_model;
99661b7972Sstephen hemminger 
100661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
101661b7972Sstephen hemminger 	struct clgstate {
102661b7972Sstephen hemminger 		/* state of the Markov chain */
103661b7972Sstephen hemminger 		u8 state;
104661b7972Sstephen hemminger 
105661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
106661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
107661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
108661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
109661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
110661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
111661b7972Sstephen hemminger 	} clg;
112661b7972Sstephen hemminger 
1131da177e4SLinus Torvalds };
1141da177e4SLinus Torvalds 
1151da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
1161da177e4SLinus Torvalds struct netem_skb_cb {
1171da177e4SLinus Torvalds 	psched_time_t	time_to_send;
1181da177e4SLinus Torvalds };
1191da177e4SLinus Torvalds 
1205f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1215f86173bSJussi Kivilinna {
122175f9c1bSJussi Kivilinna 	BUILD_BUG_ON(sizeof(skb->cb) <
123175f9c1bSJussi Kivilinna 		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
124175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1255f86173bSJussi Kivilinna }
1265f86173bSJussi Kivilinna 
1271da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1281da177e4SLinus Torvalds  * Use entropy source for initial seed.
1291da177e4SLinus Torvalds  */
1301da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1311da177e4SLinus Torvalds {
1321da177e4SLinus Torvalds 	state->rho = rho;
1331da177e4SLinus Torvalds 	state->last = net_random();
1341da177e4SLinus Torvalds }
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1371da177e4SLinus Torvalds  * Next number depends on last value.
1381da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1391da177e4SLinus Torvalds  */
140b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1411da177e4SLinus Torvalds {
1421da177e4SLinus Torvalds 	u64 value, rho;
1431da177e4SLinus Torvalds 	unsigned long answer;
1441da177e4SLinus Torvalds 
145bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1461da177e4SLinus Torvalds 		return net_random();
1471da177e4SLinus Torvalds 
1481da177e4SLinus Torvalds 	value = net_random();
1491da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1501da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1511da177e4SLinus Torvalds 	state->last = answer;
1521da177e4SLinus Torvalds 	return answer;
1531da177e4SLinus Torvalds }
1541da177e4SLinus Torvalds 
155661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
156661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
157661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
158661b7972Sstephen hemminger  */
159661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
160661b7972Sstephen hemminger {
161661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
162661b7972Sstephen hemminger 	u32 rnd = net_random();
163661b7972Sstephen hemminger 
164661b7972Sstephen hemminger 	/*
16525985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
166661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
167661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
168661b7972Sstephen hemminger 	 * The four states correspond to:
169661b7972Sstephen hemminger 	 *   1 => successfully transmitted packets within a gap period
170661b7972Sstephen hemminger 	 *   4 => isolated losses within a gap period
171661b7972Sstephen hemminger 	 *   3 => lost packets within a burst period
172661b7972Sstephen hemminger 	 *   2 => successfully transmitted packets within a burst period
173661b7972Sstephen hemminger 	 */
174661b7972Sstephen hemminger 	switch (clg->state) {
175661b7972Sstephen hemminger 	case 1:
176661b7972Sstephen hemminger 		if (rnd < clg->a4) {
177661b7972Sstephen hemminger 			clg->state = 4;
178661b7972Sstephen hemminger 			return true;
179661b7972Sstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1) {
180661b7972Sstephen hemminger 			clg->state = 3;
181661b7972Sstephen hemminger 			return true;
182661b7972Sstephen hemminger 		} else if (clg->a1 < rnd)
183661b7972Sstephen hemminger 			clg->state = 1;
184661b7972Sstephen hemminger 
185661b7972Sstephen hemminger 		break;
186661b7972Sstephen hemminger 	case 2:
187661b7972Sstephen hemminger 		if (rnd < clg->a5) {
188661b7972Sstephen hemminger 			clg->state = 3;
189661b7972Sstephen hemminger 			return true;
190661b7972Sstephen hemminger 		} else
191661b7972Sstephen hemminger 			clg->state = 2;
192661b7972Sstephen hemminger 
193661b7972Sstephen hemminger 		break;
194661b7972Sstephen hemminger 	case 3:
195661b7972Sstephen hemminger 		if (rnd < clg->a3)
196661b7972Sstephen hemminger 			clg->state = 2;
197661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
198661b7972Sstephen hemminger 			clg->state = 1;
199661b7972Sstephen hemminger 			return true;
200661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
201661b7972Sstephen hemminger 			clg->state = 3;
202661b7972Sstephen hemminger 			return true;
203661b7972Sstephen hemminger 		}
204661b7972Sstephen hemminger 		break;
205661b7972Sstephen hemminger 	case 4:
206661b7972Sstephen hemminger 		clg->state = 1;
207661b7972Sstephen hemminger 		break;
208661b7972Sstephen hemminger 	}
209661b7972Sstephen hemminger 
210661b7972Sstephen hemminger 	return false;
211661b7972Sstephen hemminger }
212661b7972Sstephen hemminger 
213661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
214661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
215661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
216661b7972Sstephen hemminger  *
21725985edcSLucas De Marchi  * Makes a comparison between random number and the transition
218661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
21925985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
220661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
221661b7972Sstephen hemminger  * packet will be transmitted or lost.
222661b7972Sstephen hemminger  */
223661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
224661b7972Sstephen hemminger {
225661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
226661b7972Sstephen hemminger 
227661b7972Sstephen hemminger 	switch (clg->state) {
228661b7972Sstephen hemminger 	case 1:
229661b7972Sstephen hemminger 		if (net_random() < clg->a1)
230661b7972Sstephen hemminger 			clg->state = 2;
231661b7972Sstephen hemminger 		if (net_random() < clg->a4)
232661b7972Sstephen hemminger 			return true;
233661b7972Sstephen hemminger 	case 2:
234661b7972Sstephen hemminger 		if (net_random() < clg->a2)
235661b7972Sstephen hemminger 			clg->state = 1;
236661b7972Sstephen hemminger 		if (clg->a3 > net_random())
237661b7972Sstephen hemminger 			return true;
238661b7972Sstephen hemminger 	}
239661b7972Sstephen hemminger 
240661b7972Sstephen hemminger 	return false;
241661b7972Sstephen hemminger }
242661b7972Sstephen hemminger 
243661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
244661b7972Sstephen hemminger {
245661b7972Sstephen hemminger 	switch (q->loss_model) {
246661b7972Sstephen hemminger 	case CLG_RANDOM:
247661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
248661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
249661b7972Sstephen hemminger 
250661b7972Sstephen hemminger 	case CLG_4_STATES:
251661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
252661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
253661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
254661b7972Sstephen hemminger 		* the kernel logs
255661b7972Sstephen hemminger 		*/
256661b7972Sstephen hemminger 		return loss_4state(q);
257661b7972Sstephen hemminger 
258661b7972Sstephen hemminger 	case CLG_GILB_ELL:
259661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
260661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
261661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
262661b7972Sstephen hemminger 		* the kernel logs
263661b7972Sstephen hemminger 		*/
264661b7972Sstephen hemminger 		return loss_gilb_ell(q);
265661b7972Sstephen hemminger 	}
266661b7972Sstephen hemminger 
267661b7972Sstephen hemminger 	return false;	/* not reached */
268661b7972Sstephen hemminger }
269661b7972Sstephen hemminger 
270661b7972Sstephen hemminger 
2711da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
2721da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
2731da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
2741da177e4SLinus Torvalds  */
275b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
276b407621cSStephen Hemminger 				struct crndstate *state,
277b407621cSStephen Hemminger 				const struct disttable *dist)
2781da177e4SLinus Torvalds {
279b407621cSStephen Hemminger 	psched_tdiff_t x;
280b407621cSStephen Hemminger 	long t;
281b407621cSStephen Hemminger 	u32 rnd;
2821da177e4SLinus Torvalds 
2831da177e4SLinus Torvalds 	if (sigma == 0)
2841da177e4SLinus Torvalds 		return mu;
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds 	rnd = get_crandom(state);
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds 	/* default uniform distribution */
2891da177e4SLinus Torvalds 	if (dist == NULL)
2901da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
2931da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
2941da177e4SLinus Torvalds 	if (x >= 0)
2951da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
2961da177e4SLinus Torvalds 	else
2971da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3001da177e4SLinus Torvalds }
3011da177e4SLinus Torvalds 
3027bc0f28cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
3037bc0f28cSHagen Paul Pfeifer {
304fc33cc72SEric Dumazet 	u64 ticks = (u64)len * NSEC_PER_SEC;
305fc33cc72SEric Dumazet 
306fc33cc72SEric Dumazet 	do_div(ticks, rate);
307fc33cc72SEric Dumazet 	return PSCHED_NS2TICKS(ticks);
3087bc0f28cSHagen Paul Pfeifer }
3097bc0f28cSHagen Paul Pfeifer 
3100afb51e7SStephen Hemminger /*
3110afb51e7SStephen Hemminger  * Insert one skb into qdisc.
3120afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
3130afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
3140afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
3150afb51e7SStephen Hemminger  */
3161da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
3171da177e4SLinus Torvalds {
3181da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
31989e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
32089e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
3210afb51e7SStephen Hemminger 	struct sk_buff *skb2;
3221da177e4SLinus Torvalds 	int ret;
3230afb51e7SStephen Hemminger 	int count = 1;
3241da177e4SLinus Torvalds 
3250afb51e7SStephen Hemminger 	/* Random duplication */
3260afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
3270afb51e7SStephen Hemminger 		++count;
3280afb51e7SStephen Hemminger 
329661b7972Sstephen hemminger 	/* Drop packet? */
330661b7972Sstephen hemminger 	if (loss_event(q))
3310afb51e7SStephen Hemminger 		--count;
3320afb51e7SStephen Hemminger 
3330afb51e7SStephen Hemminger 	if (count == 0) {
3341da177e4SLinus Torvalds 		sch->qstats.drops++;
3351da177e4SLinus Torvalds 		kfree_skb(skb);
336c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3371da177e4SLinus Torvalds 	}
3381da177e4SLinus Torvalds 
3394e8a5201SDavid S. Miller 	skb_orphan(skb);
3404e8a5201SDavid S. Miller 
3410afb51e7SStephen Hemminger 	/*
3420afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
3430afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
3440afb51e7SStephen Hemminger 	 * skb will be queued.
345d5d75cd6SStephen Hemminger 	 */
3460afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
3477698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
3480afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
3490afb51e7SStephen Hemminger 		q->duplicate = 0;
350d5d75cd6SStephen Hemminger 
3515f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
3520afb51e7SStephen Hemminger 		q->duplicate = dupsave;
3531da177e4SLinus Torvalds 	}
3541da177e4SLinus Torvalds 
355c865e5d9SStephen Hemminger 	/*
356c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
357c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
358c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
359c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
360c865e5d9SStephen Hemminger 	 */
361c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
362f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
363f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
364f64f9e71SJoe Perches 		     skb_checksum_help(skb))) {
365c865e5d9SStephen Hemminger 			sch->qstats.drops++;
366c865e5d9SStephen Hemminger 			return NET_XMIT_DROP;
367c865e5d9SStephen Hemminger 		}
368c865e5d9SStephen Hemminger 
369c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
370c865e5d9SStephen Hemminger 	}
371c865e5d9SStephen Hemminger 
3725f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
373f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
374f64f9e71SJoe Perches 	    q->counter < q->gap ||	/* inside last reordering gap */
375f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
3760f9f32acSStephen Hemminger 		psched_time_t now;
37707aaa115SStephen Hemminger 		psched_tdiff_t delay;
37807aaa115SStephen Hemminger 
37907aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
38007aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
38107aaa115SStephen Hemminger 
3823bebcda2SPatrick McHardy 		now = psched_get_time();
3837bc0f28cSHagen Paul Pfeifer 
3847bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
3857bc0f28cSHagen Paul Pfeifer 			struct sk_buff_head *list = &q->qdisc->q;
3867bc0f28cSHagen Paul Pfeifer 
3877bc0f28cSHagen Paul Pfeifer 			delay += packet_len_2_sched_time(skb->len, q->rate);
3887bc0f28cSHagen Paul Pfeifer 
3897bc0f28cSHagen Paul Pfeifer 			if (!skb_queue_empty(list)) {
3907bc0f28cSHagen Paul Pfeifer 				/*
3917bc0f28cSHagen Paul Pfeifer 				 * Last packet in queue is reference point (now).
3927bc0f28cSHagen Paul Pfeifer 				 * First packet in queue is already in flight,
3937bc0f28cSHagen Paul Pfeifer 				 * calculate this time bonus and substract
3947bc0f28cSHagen Paul Pfeifer 				 * from delay.
3957bc0f28cSHagen Paul Pfeifer 				 */
3967bc0f28cSHagen Paul Pfeifer 				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
3977bc0f28cSHagen Paul Pfeifer 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
3987bc0f28cSHagen Paul Pfeifer 			}
3997bc0f28cSHagen Paul Pfeifer 		}
4007bc0f28cSHagen Paul Pfeifer 
4017c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
4021da177e4SLinus Torvalds 		++q->counter;
4035f86173bSJussi Kivilinna 		ret = qdisc_enqueue(skb, q->qdisc);
4041da177e4SLinus Torvalds 	} else {
4050dca51d3SStephen Hemminger 		/*
4060dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
4070dca51d3SStephen Hemminger 		 * of the queue.
4080dca51d3SStephen Hemminger 		 */
4093bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
4100dca51d3SStephen Hemminger 		q->counter = 0;
4118ba25dadSJarek Poplawski 
4128ba25dadSJarek Poplawski 		__skb_queue_head(&q->qdisc->q, skb);
4138ba25dadSJarek Poplawski 		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
4148ba25dadSJarek Poplawski 		q->qdisc->qstats.requeues++;
4158ba25dadSJarek Poplawski 		ret = NET_XMIT_SUCCESS;
4161da177e4SLinus Torvalds 	}
4171da177e4SLinus Torvalds 
41810f6dfcfSstephen hemminger 	if (ret != NET_XMIT_SUCCESS) {
41910f6dfcfSstephen hemminger 		if (net_xmit_drop_count(ret)) {
4201da177e4SLinus Torvalds 			sch->qstats.drops++;
42110f6dfcfSstephen hemminger 			return ret;
42210f6dfcfSstephen hemminger 		}
423378a2f09SJarek Poplawski 	}
4241da177e4SLinus Torvalds 
42510f6dfcfSstephen hemminger 	sch->q.qlen++;
42610f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
4271da177e4SLinus Torvalds }
4281da177e4SLinus Torvalds 
4291da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch)
4301da177e4SLinus Torvalds {
4311da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4326d037a26SPatrick McHardy 	unsigned int len = 0;
4331da177e4SLinus Torvalds 
4346d037a26SPatrick McHardy 	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
4351da177e4SLinus Torvalds 		sch->q.qlen--;
4361da177e4SLinus Torvalds 		sch->qstats.drops++;
4371da177e4SLinus Torvalds 	}
4381da177e4SLinus Torvalds 	return len;
4391da177e4SLinus Torvalds }
4401da177e4SLinus Torvalds 
4411da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
4421da177e4SLinus Torvalds {
4431da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4441da177e4SLinus Torvalds 	struct sk_buff *skb;
4451da177e4SLinus Torvalds 
446fd245a4aSEric Dumazet 	if (qdisc_is_throttled(sch))
44711274e5aSStephen Hemminger 		return NULL;
44811274e5aSStephen Hemminger 
44903c05f0dSJarek Poplawski 	skb = q->qdisc->ops->peek(q->qdisc);
450771018e7SStephen Hemminger 	if (skb) {
4515f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
4523bebcda2SPatrick McHardy 		psched_time_t now = psched_get_time();
4530f9f32acSStephen Hemminger 
4540f9f32acSStephen Hemminger 		/* if more time remaining? */
455104e0878SPatrick McHardy 		if (cb->time_to_send <= now) {
45677be155cSJarek Poplawski 			skb = qdisc_dequeue_peeked(q->qdisc);
45777be155cSJarek Poplawski 			if (unlikely(!skb))
45803c05f0dSJarek Poplawski 				return NULL;
45903c05f0dSJarek Poplawski 
4608caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
4618caf1539SJarek Poplawski 			/*
4628caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
4638caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
4648caf1539SJarek Poplawski 			 */
4658caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
4668caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
4678caf1539SJarek Poplawski #endif
46810f6dfcfSstephen hemminger 
4691da177e4SLinus Torvalds 			sch->q.qlen--;
47010f6dfcfSstephen hemminger 			qdisc_unthrottled(sch);
47110f6dfcfSstephen hemminger 			qdisc_bstats_update(sch, skb);
4720f9f32acSStephen Hemminger 			return skb;
47311274e5aSStephen Hemminger 		}
47407aaa115SStephen Hemminger 
47511274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
4760f9f32acSStephen Hemminger 	}
4770f9f32acSStephen Hemminger 
4780f9f32acSStephen Hemminger 	return NULL;
4791da177e4SLinus Torvalds }
4801da177e4SLinus Torvalds 
4811da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
4821da177e4SLinus Torvalds {
4831da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
4841da177e4SLinus Torvalds 
4851da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
4861da177e4SLinus Torvalds 	sch->q.qlen = 0;
48759cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
4881da177e4SLinus Torvalds }
4891da177e4SLinus Torvalds 
4906373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
4916373a9a2Sstephen hemminger {
4926373a9a2Sstephen hemminger 	if (d) {
4936373a9a2Sstephen hemminger 		if (is_vmalloc_addr(d))
4946373a9a2Sstephen hemminger 			vfree(d);
4956373a9a2Sstephen hemminger 		else
4966373a9a2Sstephen hemminger 			kfree(d);
4976373a9a2Sstephen hemminger 	}
4986373a9a2Sstephen hemminger }
4996373a9a2Sstephen hemminger 
5001da177e4SLinus Torvalds /*
5011da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
5021da177e4SLinus Torvalds  * signed 16 bit values.
5031da177e4SLinus Torvalds  */
5041e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
5051da177e4SLinus Torvalds {
5061da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5076373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
5081e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
5097698b4fcSDavid S. Miller 	spinlock_t *root_lock;
5101da177e4SLinus Torvalds 	struct disttable *d;
5111da177e4SLinus Torvalds 	int i;
5126373a9a2Sstephen hemminger 	size_t s;
5131da177e4SLinus Torvalds 
514df173bdaSstephen hemminger 	if (n > NETEM_DIST_MAX)
5151da177e4SLinus Torvalds 		return -EINVAL;
5161da177e4SLinus Torvalds 
5176373a9a2Sstephen hemminger 	s = sizeof(struct disttable) + n * sizeof(s16);
5186373a9a2Sstephen hemminger 	d = kmalloc(s, GFP_KERNEL);
5196373a9a2Sstephen hemminger 	if (!d)
5206373a9a2Sstephen hemminger 		d = vmalloc(s);
5211da177e4SLinus Torvalds 	if (!d)
5221da177e4SLinus Torvalds 		return -ENOMEM;
5231da177e4SLinus Torvalds 
5241da177e4SLinus Torvalds 	d->size = n;
5251da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
5261da177e4SLinus Torvalds 		d->table[i] = data[i];
5271da177e4SLinus Torvalds 
528102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
5297698b4fcSDavid S. Miller 
5307698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
5316373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
532b94c8afcSPatrick McHardy 	q->delay_dist = d;
5337698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
5341da177e4SLinus Torvalds 	return 0;
5351da177e4SLinus Torvalds }
5361da177e4SLinus Torvalds 
537265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
5381da177e4SLinus Torvalds {
5391da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5401e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
5431da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
5441da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
5451da177e4SLinus Torvalds }
5461da177e4SLinus Torvalds 
547265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
5480dca51d3SStephen Hemminger {
5490dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
5501e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
5510dca51d3SStephen Hemminger 
5520dca51d3SStephen Hemminger 	q->reorder = r->probability;
5530dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
5540dca51d3SStephen Hemminger }
5550dca51d3SStephen Hemminger 
556265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
557c865e5d9SStephen Hemminger {
558c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
5591e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
560c865e5d9SStephen Hemminger 
561c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
562c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
563c865e5d9SStephen Hemminger }
564c865e5d9SStephen Hemminger 
5657bc0f28cSHagen Paul Pfeifer static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
5667bc0f28cSHagen Paul Pfeifer {
5677bc0f28cSHagen Paul Pfeifer 	struct netem_sched_data *q = qdisc_priv(sch);
5687bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
5697bc0f28cSHagen Paul Pfeifer 
5707bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
5717bc0f28cSHagen Paul Pfeifer }
5727bc0f28cSHagen Paul Pfeifer 
573661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
574661b7972Sstephen hemminger {
575661b7972Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
576661b7972Sstephen hemminger 	const struct nlattr *la;
577661b7972Sstephen hemminger 	int rem;
578661b7972Sstephen hemminger 
579661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
580661b7972Sstephen hemminger 		u16 type = nla_type(la);
581661b7972Sstephen hemminger 
582661b7972Sstephen hemminger 		switch(type) {
583661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
584661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
585661b7972Sstephen hemminger 
586661b7972Sstephen hemminger 			if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
587661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
588661b7972Sstephen hemminger 				return -EINVAL;
589661b7972Sstephen hemminger 			}
590661b7972Sstephen hemminger 
591661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
592661b7972Sstephen hemminger 
593661b7972Sstephen hemminger 			q->clg.state = 1;
594661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
595661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
596661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
597661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
598661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
599661b7972Sstephen hemminger 			break;
600661b7972Sstephen hemminger 		}
601661b7972Sstephen hemminger 
602661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
603661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
604661b7972Sstephen hemminger 
605661b7972Sstephen hemminger 			if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
606661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
607661b7972Sstephen hemminger 				return -EINVAL;
608661b7972Sstephen hemminger 			}
609661b7972Sstephen hemminger 
610661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
611661b7972Sstephen hemminger 			q->clg.state = 1;
612661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
613661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
614661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
615661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
616661b7972Sstephen hemminger 			break;
617661b7972Sstephen hemminger 		}
618661b7972Sstephen hemminger 
619661b7972Sstephen hemminger 		default:
620661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
621661b7972Sstephen hemminger 			return -EINVAL;
622661b7972Sstephen hemminger 		}
623661b7972Sstephen hemminger 	}
624661b7972Sstephen hemminger 
625661b7972Sstephen hemminger 	return 0;
626661b7972Sstephen hemminger }
627661b7972Sstephen hemminger 
62827a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
62927a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
63027a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
63127a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
6327bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
633661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
63427a3421eSPatrick McHardy };
63527a3421eSPatrick McHardy 
6362c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
6372c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
6382c10b32bSThomas Graf {
6392c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
6402c10b32bSThomas Graf 
641661b7972Sstephen hemminger 	if (nested_len < 0) {
642661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
6432c10b32bSThomas Graf 		return -EINVAL;
644661b7972Sstephen hemminger 	}
645661b7972Sstephen hemminger 
6462c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
6472c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
6482c10b32bSThomas Graf 				 nested_len, policy);
649661b7972Sstephen hemminger 
6502c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
6512c10b32bSThomas Graf 	return 0;
6522c10b32bSThomas Graf }
6532c10b32bSThomas Graf 
654c865e5d9SStephen Hemminger /* Parse netlink message to set options */
6551e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
6561da177e4SLinus Torvalds {
6571da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
658b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
6591da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
6601da177e4SLinus Torvalds 	int ret;
6611da177e4SLinus Torvalds 
662b03f4672SPatrick McHardy 	if (opt == NULL)
6631da177e4SLinus Torvalds 		return -EINVAL;
6641da177e4SLinus Torvalds 
6652c10b32bSThomas Graf 	qopt = nla_data(opt);
6662c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
667b03f4672SPatrick McHardy 	if (ret < 0)
668b03f4672SPatrick McHardy 		return ret;
669b03f4672SPatrick McHardy 
670fb0305ceSPatrick McHardy 	ret = fifo_set_limit(q->qdisc, qopt->limit);
6711da177e4SLinus Torvalds 	if (ret) {
672250a65f7Sstephen hemminger 		pr_info("netem: can't set fifo limit\n");
6731da177e4SLinus Torvalds 		return ret;
6741da177e4SLinus Torvalds 	}
6751da177e4SLinus Torvalds 
6761da177e4SLinus Torvalds 	q->latency = qopt->latency;
6771da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
6781da177e4SLinus Torvalds 	q->limit = qopt->limit;
6791da177e4SLinus Torvalds 	q->gap = qopt->gap;
6800dca51d3SStephen Hemminger 	q->counter = 0;
6811da177e4SLinus Torvalds 	q->loss = qopt->loss;
6821da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
6831da177e4SLinus Torvalds 
684bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
685bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
6860dca51d3SStephen Hemminger 	 */
687a362e0a7SStephen Hemminger 	if (q->gap)
6880dca51d3SStephen Hemminger 		q->reorder = ~0;
6890dca51d3SStephen Hemminger 
690265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
691265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
6921da177e4SLinus Torvalds 
6931e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
6941e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
6951da177e4SLinus Torvalds 		if (ret)
6961da177e4SLinus Torvalds 			return ret;
6971da177e4SLinus Torvalds 	}
698c865e5d9SStephen Hemminger 
699265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
700265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
7011da177e4SLinus Torvalds 
702265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
703265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
7041da177e4SLinus Torvalds 
7057bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
7067bc0f28cSHagen Paul Pfeifer 		get_rate(sch, tb[TCA_NETEM_RATE]);
7077bc0f28cSHagen Paul Pfeifer 
708661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
709661b7972Sstephen hemminger 	if (tb[TCA_NETEM_LOSS])
710661b7972Sstephen hemminger 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
711661b7972Sstephen hemminger 
712661b7972Sstephen hemminger 	return ret;
7131da177e4SLinus Torvalds }
7141da177e4SLinus Torvalds 
715300ce174SStephen Hemminger /*
716300ce174SStephen Hemminger  * Special case version of FIFO queue for use by netem.
717300ce174SStephen Hemminger  * It queues in order based on timestamps in skb's
718300ce174SStephen Hemminger  */
719300ce174SStephen Hemminger struct fifo_sched_data {
720300ce174SStephen Hemminger 	u32 limit;
721075aa573SStephen Hemminger 	psched_time_t oldest;
722300ce174SStephen Hemminger };
723300ce174SStephen Hemminger 
724300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
725300ce174SStephen Hemminger {
726300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
727300ce174SStephen Hemminger 	struct sk_buff_head *list = &sch->q;
7285f86173bSJussi Kivilinna 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
729300ce174SStephen Hemminger 	struct sk_buff *skb;
730300ce174SStephen Hemminger 
731300ce174SStephen Hemminger 	if (likely(skb_queue_len(list) < q->limit)) {
732075aa573SStephen Hemminger 		/* Optimize for add at tail */
733104e0878SPatrick McHardy 		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
734075aa573SStephen Hemminger 			q->oldest = tnext;
735075aa573SStephen Hemminger 			return qdisc_enqueue_tail(nskb, sch);
736075aa573SStephen Hemminger 		}
737075aa573SStephen Hemminger 
738300ce174SStephen Hemminger 		skb_queue_reverse_walk(list, skb) {
7395f86173bSJussi Kivilinna 			const struct netem_skb_cb *cb = netem_skb_cb(skb);
740300ce174SStephen Hemminger 
741104e0878SPatrick McHardy 			if (tnext >= cb->time_to_send)
742300ce174SStephen Hemminger 				break;
743300ce174SStephen Hemminger 		}
744300ce174SStephen Hemminger 
745300ce174SStephen Hemminger 		__skb_queue_after(list, skb, nskb);
746300ce174SStephen Hemminger 
7470abf77e5SJussi Kivilinna 		sch->qstats.backlog += qdisc_pkt_len(nskb);
748300ce174SStephen Hemminger 
749300ce174SStephen Hemminger 		return NET_XMIT_SUCCESS;
750300ce174SStephen Hemminger 	}
751300ce174SStephen Hemminger 
752075aa573SStephen Hemminger 	return qdisc_reshape_fail(nskb, sch);
753300ce174SStephen Hemminger }
754300ce174SStephen Hemminger 
7551e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
756300ce174SStephen Hemminger {
757300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
758300ce174SStephen Hemminger 
759300ce174SStephen Hemminger 	if (opt) {
7601e90474cSPatrick McHardy 		struct tc_fifo_qopt *ctl = nla_data(opt);
7611e90474cSPatrick McHardy 		if (nla_len(opt) < sizeof(*ctl))
762300ce174SStephen Hemminger 			return -EINVAL;
763300ce174SStephen Hemminger 
764300ce174SStephen Hemminger 		q->limit = ctl->limit;
765300ce174SStephen Hemminger 	} else
7665ce2d488SDavid S. Miller 		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
767300ce174SStephen Hemminger 
768a084980dSPatrick McHardy 	q->oldest = PSCHED_PASTPERFECT;
769300ce174SStephen Hemminger 	return 0;
770300ce174SStephen Hemminger }
771300ce174SStephen Hemminger 
772300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
773300ce174SStephen Hemminger {
774300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
775300ce174SStephen Hemminger 	struct tc_fifo_qopt opt = { .limit = q->limit };
776300ce174SStephen Hemminger 
7771e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
778300ce174SStephen Hemminger 	return skb->len;
779300ce174SStephen Hemminger 
7801e90474cSPatrick McHardy nla_put_failure:
781300ce174SStephen Hemminger 	return -1;
782300ce174SStephen Hemminger }
783300ce174SStephen Hemminger 
78420fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
785300ce174SStephen Hemminger 	.id		=	"tfifo",
786300ce174SStephen Hemminger 	.priv_size	=	sizeof(struct fifo_sched_data),
787300ce174SStephen Hemminger 	.enqueue	=	tfifo_enqueue,
788300ce174SStephen Hemminger 	.dequeue	=	qdisc_dequeue_head,
7898e3af978SJarek Poplawski 	.peek		=	qdisc_peek_head,
790300ce174SStephen Hemminger 	.drop		=	qdisc_queue_drop,
791300ce174SStephen Hemminger 	.init		=	tfifo_init,
792300ce174SStephen Hemminger 	.reset		=	qdisc_reset_queue,
793300ce174SStephen Hemminger 	.change		=	tfifo_init,
794300ce174SStephen Hemminger 	.dump		=	tfifo_dump,
795300ce174SStephen Hemminger };
796300ce174SStephen Hemminger 
7971e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
7981da177e4SLinus Torvalds {
7991da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
8001da177e4SLinus Torvalds 	int ret;
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	if (!opt)
8031da177e4SLinus Torvalds 		return -EINVAL;
8041da177e4SLinus Torvalds 
80559cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
8061da177e4SLinus Torvalds 
807661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
8083511c913SChangli Gao 	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
8099f9afec4SPatrick McHardy 				     TC_H_MAKE(sch->handle, 1));
8101da177e4SLinus Torvalds 	if (!q->qdisc) {
811250a65f7Sstephen hemminger 		pr_notice("netem: qdisc create tfifo qdisc failed\n");
8121da177e4SLinus Torvalds 		return -ENOMEM;
8131da177e4SLinus Torvalds 	}
8141da177e4SLinus Torvalds 
8151da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
8161da177e4SLinus Torvalds 	if (ret) {
817250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
8181da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
8191da177e4SLinus Torvalds 	}
8201da177e4SLinus Torvalds 	return ret;
8211da177e4SLinus Torvalds }
8221da177e4SLinus Torvalds 
8231da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
8241da177e4SLinus Torvalds {
8251da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
8261da177e4SLinus Torvalds 
82759cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
8281da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
8296373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
8301da177e4SLinus Torvalds }
8311da177e4SLinus Torvalds 
832661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
833661b7972Sstephen hemminger 			   struct sk_buff *skb)
834661b7972Sstephen hemminger {
835661b7972Sstephen hemminger 	struct nlattr *nest;
836661b7972Sstephen hemminger 
837661b7972Sstephen hemminger 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
838661b7972Sstephen hemminger 	if (nest == NULL)
839661b7972Sstephen hemminger 		goto nla_put_failure;
840661b7972Sstephen hemminger 
841661b7972Sstephen hemminger 	switch (q->loss_model) {
842661b7972Sstephen hemminger 	case CLG_RANDOM:
843661b7972Sstephen hemminger 		/* legacy loss model */
844661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
845661b7972Sstephen hemminger 		return 0;	/* no data */
846661b7972Sstephen hemminger 
847661b7972Sstephen hemminger 	case CLG_4_STATES: {
848661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
849661b7972Sstephen hemminger 			.p13 = q->clg.a1,
850661b7972Sstephen hemminger 			.p31 = q->clg.a2,
851661b7972Sstephen hemminger 			.p32 = q->clg.a3,
852661b7972Sstephen hemminger 			.p14 = q->clg.a4,
853661b7972Sstephen hemminger 			.p23 = q->clg.a5,
854661b7972Sstephen hemminger 		};
855661b7972Sstephen hemminger 
856661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
857661b7972Sstephen hemminger 		break;
858661b7972Sstephen hemminger 	}
859661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
860661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
861661b7972Sstephen hemminger 			.p = q->clg.a1,
862661b7972Sstephen hemminger 			.r = q->clg.a2,
863661b7972Sstephen hemminger 			.h = q->clg.a3,
864661b7972Sstephen hemminger 			.k1 = q->clg.a4,
865661b7972Sstephen hemminger 		};
866661b7972Sstephen hemminger 
867661b7972Sstephen hemminger 		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
868661b7972Sstephen hemminger 		break;
869661b7972Sstephen hemminger 	}
870661b7972Sstephen hemminger 	}
871661b7972Sstephen hemminger 
872661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
873661b7972Sstephen hemminger 	return 0;
874661b7972Sstephen hemminger 
875661b7972Sstephen hemminger nla_put_failure:
876661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
877661b7972Sstephen hemminger 	return -1;
878661b7972Sstephen hemminger }
879661b7972Sstephen hemminger 
8801da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
8811da177e4SLinus Torvalds {
8821da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
883861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
8841da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
8851da177e4SLinus Torvalds 	struct tc_netem_corr cor;
8860dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
887c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
8887bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
8891da177e4SLinus Torvalds 
8901da177e4SLinus Torvalds 	qopt.latency = q->latency;
8911da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
8921da177e4SLinus Torvalds 	qopt.limit = q->limit;
8931da177e4SLinus Torvalds 	qopt.loss = q->loss;
8941da177e4SLinus Torvalds 	qopt.gap = q->gap;
8951da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
8961e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
8971da177e4SLinus Torvalds 
8981da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
8991da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
9001da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
9011e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
9020dca51d3SStephen Hemminger 
9030dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
9040dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
9051e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
9060dca51d3SStephen Hemminger 
907c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
908c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
9091e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
910c865e5d9SStephen Hemminger 
9117bc0f28cSHagen Paul Pfeifer 	rate.rate = q->rate;
9127bc0f28cSHagen Paul Pfeifer 	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
9137bc0f28cSHagen Paul Pfeifer 
914661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
915661b7972Sstephen hemminger 		goto nla_put_failure;
916661b7972Sstephen hemminger 
917861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
9181da177e4SLinus Torvalds 
9191e90474cSPatrick McHardy nla_put_failure:
920861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
9211da177e4SLinus Torvalds 	return -1;
9221da177e4SLinus Torvalds }
9231da177e4SLinus Torvalds 
92410f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
92510f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
92610f6dfcfSstephen hemminger {
92710f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
92810f6dfcfSstephen hemminger 
92910f6dfcfSstephen hemminger 	if (cl != 1) 	/* only one class */
93010f6dfcfSstephen hemminger 		return -ENOENT;
93110f6dfcfSstephen hemminger 
93210f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
93310f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
93410f6dfcfSstephen hemminger 
93510f6dfcfSstephen hemminger 	return 0;
93610f6dfcfSstephen hemminger }
93710f6dfcfSstephen hemminger 
93810f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
93910f6dfcfSstephen hemminger 		     struct Qdisc **old)
94010f6dfcfSstephen hemminger {
94110f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
94210f6dfcfSstephen hemminger 
94310f6dfcfSstephen hemminger 	if (new == NULL)
94410f6dfcfSstephen hemminger 		new = &noop_qdisc;
94510f6dfcfSstephen hemminger 
94610f6dfcfSstephen hemminger 	sch_tree_lock(sch);
94710f6dfcfSstephen hemminger 	*old = q->qdisc;
94810f6dfcfSstephen hemminger 	q->qdisc = new;
94910f6dfcfSstephen hemminger 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
95010f6dfcfSstephen hemminger 	qdisc_reset(*old);
95110f6dfcfSstephen hemminger 	sch_tree_unlock(sch);
95210f6dfcfSstephen hemminger 
95310f6dfcfSstephen hemminger 	return 0;
95410f6dfcfSstephen hemminger }
95510f6dfcfSstephen hemminger 
95610f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
95710f6dfcfSstephen hemminger {
95810f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
95910f6dfcfSstephen hemminger 	return q->qdisc;
96010f6dfcfSstephen hemminger }
96110f6dfcfSstephen hemminger 
96210f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid)
96310f6dfcfSstephen hemminger {
96410f6dfcfSstephen hemminger 	return 1;
96510f6dfcfSstephen hemminger }
96610f6dfcfSstephen hemminger 
96710f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg)
96810f6dfcfSstephen hemminger {
96910f6dfcfSstephen hemminger }
97010f6dfcfSstephen hemminger 
97110f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
97210f6dfcfSstephen hemminger {
97310f6dfcfSstephen hemminger 	if (!walker->stop) {
97410f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
97510f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
97610f6dfcfSstephen hemminger 				walker->stop = 1;
97710f6dfcfSstephen hemminger 				return;
97810f6dfcfSstephen hemminger 			}
97910f6dfcfSstephen hemminger 		walker->count++;
98010f6dfcfSstephen hemminger 	}
98110f6dfcfSstephen hemminger }
98210f6dfcfSstephen hemminger 
98310f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
98410f6dfcfSstephen hemminger 	.graft		=	netem_graft,
98510f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
98610f6dfcfSstephen hemminger 	.get		=	netem_get,
98710f6dfcfSstephen hemminger 	.put		=	netem_put,
98810f6dfcfSstephen hemminger 	.walk		=	netem_walk,
98910f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
99010f6dfcfSstephen hemminger };
99110f6dfcfSstephen hemminger 
99220fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
9931da177e4SLinus Torvalds 	.id		=	"netem",
99410f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
9951da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
9961da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
9971da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
99877be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
9991da177e4SLinus Torvalds 	.drop		=	netem_drop,
10001da177e4SLinus Torvalds 	.init		=	netem_init,
10011da177e4SLinus Torvalds 	.reset		=	netem_reset,
10021da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
10031da177e4SLinus Torvalds 	.change		=	netem_change,
10041da177e4SLinus Torvalds 	.dump		=	netem_dump,
10051da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
10061da177e4SLinus Torvalds };
10071da177e4SLinus Torvalds 
10081da177e4SLinus Torvalds 
10091da177e4SLinus Torvalds static int __init netem_module_init(void)
10101da177e4SLinus Torvalds {
1011eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
10121da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
10131da177e4SLinus Torvalds }
10141da177e4SLinus Torvalds static void __exit netem_module_exit(void)
10151da177e4SLinus Torvalds {
10161da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
10171da177e4SLinus Torvalds }
10181da177e4SLinus Torvalds module_init(netem_module_init)
10191da177e4SLinus Torvalds module_exit(netem_module_exit)
10201da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1021