xref: /openbmc/linux/net/sched/sch_netem.c (revision 3511c913)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/module.h>
175a0e3ad6STejun Heo #include <linux/slab.h>
181da177e4SLinus Torvalds #include <linux/types.h>
191da177e4SLinus Torvalds #include <linux/kernel.h>
201da177e4SLinus Torvalds #include <linux/errno.h>
211da177e4SLinus Torvalds #include <linux/skbuff.h>
221da177e4SLinus Torvalds #include <linux/rtnetlink.h>
231da177e4SLinus Torvalds 
24dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
251da177e4SLinus Torvalds #include <net/pkt_sched.h>
261da177e4SLinus Torvalds 
27c865e5d9SStephen Hemminger #define VERSION "1.2"
28eb229c4cSStephen Hemminger 
291da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
301da177e4SLinus Torvalds 	====================================
311da177e4SLinus Torvalds 
321da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
331da177e4SLinus Torvalds 		 Network Emulation Tool
341da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds 	 ----------------------------------------------------------------
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
391da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
401da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
411da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
421da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
431da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
441da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
451da177e4SLinus Torvalds 
461da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
471da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
481da177e4SLinus Torvalds 	 control either since that can be handled by using token
491da177e4SLinus Torvalds 	 bucket or other rate control.
501da177e4SLinus Torvalds */
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds struct netem_sched_data {
531da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
5459cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
551da177e4SLinus Torvalds 
56b407621cSStephen Hemminger 	psched_tdiff_t latency;
57b407621cSStephen Hemminger 	psched_tdiff_t jitter;
58b407621cSStephen Hemminger 
591da177e4SLinus Torvalds 	u32 loss;
601da177e4SLinus Torvalds 	u32 limit;
611da177e4SLinus Torvalds 	u32 counter;
621da177e4SLinus Torvalds 	u32 gap;
631da177e4SLinus Torvalds 	u32 duplicate;
640dca51d3SStephen Hemminger 	u32 reorder;
65c865e5d9SStephen Hemminger 	u32 corrupt;
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds 	struct crndstate {
68b407621cSStephen Hemminger 		u32 last;
69b407621cSStephen Hemminger 		u32 rho;
70c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
711da177e4SLinus Torvalds 
721da177e4SLinus Torvalds 	struct disttable {
731da177e4SLinus Torvalds 		u32  size;
741da177e4SLinus Torvalds 		s16 table[0];
751da177e4SLinus Torvalds 	} *delay_dist;
761da177e4SLinus Torvalds };
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
791da177e4SLinus Torvalds struct netem_skb_cb {
801da177e4SLinus Torvalds 	psched_time_t	time_to_send;
811da177e4SLinus Torvalds };
821da177e4SLinus Torvalds 
835f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
845f86173bSJussi Kivilinna {
85175f9c1bSJussi Kivilinna 	BUILD_BUG_ON(sizeof(skb->cb) <
86175f9c1bSJussi Kivilinna 		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
87175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
885f86173bSJussi Kivilinna }
895f86173bSJussi Kivilinna 
901da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
911da177e4SLinus Torvalds  * Use entropy source for initial seed.
921da177e4SLinus Torvalds  */
931da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
941da177e4SLinus Torvalds {
951da177e4SLinus Torvalds 	state->rho = rho;
961da177e4SLinus Torvalds 	state->last = net_random();
971da177e4SLinus Torvalds }
981da177e4SLinus Torvalds 
991da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1001da177e4SLinus Torvalds  * Next number depends on last value.
1011da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1021da177e4SLinus Torvalds  */
103b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1041da177e4SLinus Torvalds {
1051da177e4SLinus Torvalds 	u64 value, rho;
1061da177e4SLinus Torvalds 	unsigned long answer;
1071da177e4SLinus Torvalds 
108bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1091da177e4SLinus Torvalds 		return net_random();
1101da177e4SLinus Torvalds 
1111da177e4SLinus Torvalds 	value = net_random();
1121da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1131da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1141da177e4SLinus Torvalds 	state->last = answer;
1151da177e4SLinus Torvalds 	return answer;
1161da177e4SLinus Torvalds }
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
1191da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
1201da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
1211da177e4SLinus Torvalds  */
122b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
123b407621cSStephen Hemminger 				struct crndstate *state,
124b407621cSStephen Hemminger 				const struct disttable *dist)
1251da177e4SLinus Torvalds {
126b407621cSStephen Hemminger 	psched_tdiff_t x;
127b407621cSStephen Hemminger 	long t;
128b407621cSStephen Hemminger 	u32 rnd;
1291da177e4SLinus Torvalds 
1301da177e4SLinus Torvalds 	if (sigma == 0)
1311da177e4SLinus Torvalds 		return mu;
1321da177e4SLinus Torvalds 
1331da177e4SLinus Torvalds 	rnd = get_crandom(state);
1341da177e4SLinus Torvalds 
1351da177e4SLinus Torvalds 	/* default uniform distribution */
1361da177e4SLinus Torvalds 	if (dist == NULL)
1371da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
1381da177e4SLinus Torvalds 
1391da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
1401da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
1411da177e4SLinus Torvalds 	if (x >= 0)
1421da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
1431da177e4SLinus Torvalds 	else
1441da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
1451da177e4SLinus Torvalds 
1461da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
1471da177e4SLinus Torvalds }
1481da177e4SLinus Torvalds 
1490afb51e7SStephen Hemminger /*
1500afb51e7SStephen Hemminger  * Insert one skb into qdisc.
1510afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
1520afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
1530afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
1540afb51e7SStephen Hemminger  */
1551da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1561da177e4SLinus Torvalds {
1571da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
15889e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
15989e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
1600afb51e7SStephen Hemminger 	struct sk_buff *skb2;
1611da177e4SLinus Torvalds 	int ret;
1620afb51e7SStephen Hemminger 	int count = 1;
1631da177e4SLinus Torvalds 
164771018e7SStephen Hemminger 	pr_debug("netem_enqueue skb=%p\n", skb);
1651da177e4SLinus Torvalds 
1660afb51e7SStephen Hemminger 	/* Random duplication */
1670afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
1680afb51e7SStephen Hemminger 		++count;
1690afb51e7SStephen Hemminger 
1701da177e4SLinus Torvalds 	/* Random packet drop 0 => none, ~0 => all */
1710afb51e7SStephen Hemminger 	if (q->loss && q->loss >= get_crandom(&q->loss_cor))
1720afb51e7SStephen Hemminger 		--count;
1730afb51e7SStephen Hemminger 
1740afb51e7SStephen Hemminger 	if (count == 0) {
1751da177e4SLinus Torvalds 		sch->qstats.drops++;
1761da177e4SLinus Torvalds 		kfree_skb(skb);
177c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1781da177e4SLinus Torvalds 	}
1791da177e4SLinus Torvalds 
1804e8a5201SDavid S. Miller 	skb_orphan(skb);
1814e8a5201SDavid S. Miller 
1820afb51e7SStephen Hemminger 	/*
1830afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
1840afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
1850afb51e7SStephen Hemminger 	 * skb will be queued.
186d5d75cd6SStephen Hemminger 	 */
1870afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
1887698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
1890afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
1900afb51e7SStephen Hemminger 		q->duplicate = 0;
191d5d75cd6SStephen Hemminger 
1925f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
1930afb51e7SStephen Hemminger 		q->duplicate = dupsave;
1941da177e4SLinus Torvalds 	}
1951da177e4SLinus Torvalds 
196c865e5d9SStephen Hemminger 	/*
197c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
198c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
199c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
200c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
201c865e5d9SStephen Hemminger 	 */
202c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
203f64f9e71SJoe Perches 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
204f64f9e71SJoe Perches 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
205f64f9e71SJoe Perches 		     skb_checksum_help(skb))) {
206c865e5d9SStephen Hemminger 			sch->qstats.drops++;
207c865e5d9SStephen Hemminger 			return NET_XMIT_DROP;
208c865e5d9SStephen Hemminger 		}
209c865e5d9SStephen Hemminger 
210c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
211c865e5d9SStephen Hemminger 	}
212c865e5d9SStephen Hemminger 
2135f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
214f64f9e71SJoe Perches 	if (q->gap == 0 || 		/* not doing reordering */
215f64f9e71SJoe Perches 	    q->counter < q->gap || 	/* inside last reordering gap */
216f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
2170f9f32acSStephen Hemminger 		psched_time_t now;
21807aaa115SStephen Hemminger 		psched_tdiff_t delay;
21907aaa115SStephen Hemminger 
22007aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
22107aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
22207aaa115SStephen Hemminger 
2233bebcda2SPatrick McHardy 		now = psched_get_time();
2247c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
2251da177e4SLinus Torvalds 		++q->counter;
2265f86173bSJussi Kivilinna 		ret = qdisc_enqueue(skb, q->qdisc);
2271da177e4SLinus Torvalds 	} else {
2280dca51d3SStephen Hemminger 		/*
2290dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
2300dca51d3SStephen Hemminger 		 * of the queue.
2310dca51d3SStephen Hemminger 		 */
2323bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
2330dca51d3SStephen Hemminger 		q->counter = 0;
2348ba25dadSJarek Poplawski 
2358ba25dadSJarek Poplawski 		__skb_queue_head(&q->qdisc->q, skb);
2368ba25dadSJarek Poplawski 		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
2378ba25dadSJarek Poplawski 		q->qdisc->qstats.requeues++;
2388ba25dadSJarek Poplawski 		ret = NET_XMIT_SUCCESS;
2391da177e4SLinus Torvalds 	}
2401da177e4SLinus Torvalds 
2411da177e4SLinus Torvalds 	if (likely(ret == NET_XMIT_SUCCESS)) {
2421da177e4SLinus Torvalds 		sch->q.qlen++;
2430abf77e5SJussi Kivilinna 		sch->bstats.bytes += qdisc_pkt_len(skb);
2441da177e4SLinus Torvalds 		sch->bstats.packets++;
245378a2f09SJarek Poplawski 	} else if (net_xmit_drop_count(ret)) {
2461da177e4SLinus Torvalds 		sch->qstats.drops++;
247378a2f09SJarek Poplawski 	}
2481da177e4SLinus Torvalds 
249d5d75cd6SStephen Hemminger 	pr_debug("netem: enqueue ret %d\n", ret);
2501da177e4SLinus Torvalds 	return ret;
2511da177e4SLinus Torvalds }
2521da177e4SLinus Torvalds 
2531da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch)
2541da177e4SLinus Torvalds {
2551da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2566d037a26SPatrick McHardy 	unsigned int len = 0;
2571da177e4SLinus Torvalds 
2586d037a26SPatrick McHardy 	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
2591da177e4SLinus Torvalds 		sch->q.qlen--;
2601da177e4SLinus Torvalds 		sch->qstats.drops++;
2611da177e4SLinus Torvalds 	}
2621da177e4SLinus Torvalds 	return len;
2631da177e4SLinus Torvalds }
2641da177e4SLinus Torvalds 
2651da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
2661da177e4SLinus Torvalds {
2671da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2681da177e4SLinus Torvalds 	struct sk_buff *skb;
2691da177e4SLinus Torvalds 
27011274e5aSStephen Hemminger 	if (sch->flags & TCQ_F_THROTTLED)
27111274e5aSStephen Hemminger 		return NULL;
27211274e5aSStephen Hemminger 
27303c05f0dSJarek Poplawski 	skb = q->qdisc->ops->peek(q->qdisc);
274771018e7SStephen Hemminger 	if (skb) {
2755f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
2763bebcda2SPatrick McHardy 		psched_time_t now = psched_get_time();
2770f9f32acSStephen Hemminger 
2780f9f32acSStephen Hemminger 		/* if more time remaining? */
279104e0878SPatrick McHardy 		if (cb->time_to_send <= now) {
28077be155cSJarek Poplawski 			skb = qdisc_dequeue_peeked(q->qdisc);
28177be155cSJarek Poplawski 			if (unlikely(!skb))
28203c05f0dSJarek Poplawski 				return NULL;
28303c05f0dSJarek Poplawski 
2848caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT
2858caf1539SJarek Poplawski 			/*
2868caf1539SJarek Poplawski 			 * If it's at ingress let's pretend the delay is
2878caf1539SJarek Poplawski 			 * from the network (tstamp will be updated).
2888caf1539SJarek Poplawski 			 */
2898caf1539SJarek Poplawski 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
2908caf1539SJarek Poplawski 				skb->tstamp.tv64 = 0;
2918caf1539SJarek Poplawski #endif
292771018e7SStephen Hemminger 			pr_debug("netem_dequeue: return skb=%p\n", skb);
2931da177e4SLinus Torvalds 			sch->q.qlen--;
2940f9f32acSStephen Hemminger 			return skb;
29511274e5aSStephen Hemminger 		}
29607aaa115SStephen Hemminger 
29711274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
2980f9f32acSStephen Hemminger 	}
2990f9f32acSStephen Hemminger 
3000f9f32acSStephen Hemminger 	return NULL;
3011da177e4SLinus Torvalds }
3021da177e4SLinus Torvalds 
3031da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
3041da177e4SLinus Torvalds {
3051da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3061da177e4SLinus Torvalds 
3071da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
3081da177e4SLinus Torvalds 	sch->q.qlen = 0;
30959cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
3101da177e4SLinus Torvalds }
3111da177e4SLinus Torvalds 
3121da177e4SLinus Torvalds /*
3131da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
3141da177e4SLinus Torvalds  * signed 16 bit values.
3151da177e4SLinus Torvalds  */
3161e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
3171da177e4SLinus Torvalds {
3181da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3191e90474cSPatrick McHardy 	unsigned long n = nla_len(attr)/sizeof(__s16);
3201e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
3217698b4fcSDavid S. Miller 	spinlock_t *root_lock;
3221da177e4SLinus Torvalds 	struct disttable *d;
3231da177e4SLinus Torvalds 	int i;
3241da177e4SLinus Torvalds 
3251da177e4SLinus Torvalds 	if (n > 65536)
3261da177e4SLinus Torvalds 		return -EINVAL;
3271da177e4SLinus Torvalds 
3281da177e4SLinus Torvalds 	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
3291da177e4SLinus Torvalds 	if (!d)
3301da177e4SLinus Torvalds 		return -ENOMEM;
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds 	d->size = n;
3331da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
3341da177e4SLinus Torvalds 		d->table[i] = data[i];
3351da177e4SLinus Torvalds 
336102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
3377698b4fcSDavid S. Miller 
3387698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
339b94c8afcSPatrick McHardy 	kfree(q->delay_dist);
340b94c8afcSPatrick McHardy 	q->delay_dist = d;
3417698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
3421da177e4SLinus Torvalds 	return 0;
3431da177e4SLinus Torvalds }
3441da177e4SLinus Torvalds 
345265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
3461da177e4SLinus Torvalds {
3471da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3481e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
3491da177e4SLinus Torvalds 
3501da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
3511da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
3521da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
3531da177e4SLinus Torvalds }
3541da177e4SLinus Torvalds 
355265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
3560dca51d3SStephen Hemminger {
3570dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3581e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
3590dca51d3SStephen Hemminger 
3600dca51d3SStephen Hemminger 	q->reorder = r->probability;
3610dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
3620dca51d3SStephen Hemminger }
3630dca51d3SStephen Hemminger 
364265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
365c865e5d9SStephen Hemminger {
366c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3671e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
368c865e5d9SStephen Hemminger 
369c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
370c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
371c865e5d9SStephen Hemminger }
372c865e5d9SStephen Hemminger 
37327a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
37427a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
37527a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
37627a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
37727a3421eSPatrick McHardy };
37827a3421eSPatrick McHardy 
3792c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
3802c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
3812c10b32bSThomas Graf {
3822c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
3832c10b32bSThomas Graf 
3842c10b32bSThomas Graf 	if (nested_len < 0)
3852c10b32bSThomas Graf 		return -EINVAL;
3862c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
3872c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
3882c10b32bSThomas Graf 				 nested_len, policy);
3892c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
3902c10b32bSThomas Graf 	return 0;
3912c10b32bSThomas Graf }
3922c10b32bSThomas Graf 
393c865e5d9SStephen Hemminger /* Parse netlink message to set options */
3941e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
3951da177e4SLinus Torvalds {
3961da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
397b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
3981da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
3991da177e4SLinus Torvalds 	int ret;
4001da177e4SLinus Torvalds 
401b03f4672SPatrick McHardy 	if (opt == NULL)
4021da177e4SLinus Torvalds 		return -EINVAL;
4031da177e4SLinus Torvalds 
4042c10b32bSThomas Graf 	qopt = nla_data(opt);
4052c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
406b03f4672SPatrick McHardy 	if (ret < 0)
407b03f4672SPatrick McHardy 		return ret;
408b03f4672SPatrick McHardy 
409fb0305ceSPatrick McHardy 	ret = fifo_set_limit(q->qdisc, qopt->limit);
4101da177e4SLinus Torvalds 	if (ret) {
4111da177e4SLinus Torvalds 		pr_debug("netem: can't set fifo limit\n");
4121da177e4SLinus Torvalds 		return ret;
4131da177e4SLinus Torvalds 	}
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds 	q->latency = qopt->latency;
4161da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
4171da177e4SLinus Torvalds 	q->limit = qopt->limit;
4181da177e4SLinus Torvalds 	q->gap = qopt->gap;
4190dca51d3SStephen Hemminger 	q->counter = 0;
4201da177e4SLinus Torvalds 	q->loss = qopt->loss;
4211da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
4221da177e4SLinus Torvalds 
423bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
424bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
4250dca51d3SStephen Hemminger 	 */
426a362e0a7SStephen Hemminger 	if (q->gap)
4270dca51d3SStephen Hemminger 		q->reorder = ~0;
4280dca51d3SStephen Hemminger 
429265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
430265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
4311da177e4SLinus Torvalds 
4321e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
4331e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
4341da177e4SLinus Torvalds 		if (ret)
4351da177e4SLinus Torvalds 			return ret;
4361da177e4SLinus Torvalds 	}
437c865e5d9SStephen Hemminger 
438265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
439265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
4401da177e4SLinus Torvalds 
441265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
442265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
4431da177e4SLinus Torvalds 
4441da177e4SLinus Torvalds 	return 0;
4451da177e4SLinus Torvalds }
4461da177e4SLinus Torvalds 
447300ce174SStephen Hemminger /*
448300ce174SStephen Hemminger  * Special case version of FIFO queue for use by netem.
449300ce174SStephen Hemminger  * It queues in order based on timestamps in skb's
450300ce174SStephen Hemminger  */
451300ce174SStephen Hemminger struct fifo_sched_data {
452300ce174SStephen Hemminger 	u32 limit;
453075aa573SStephen Hemminger 	psched_time_t oldest;
454300ce174SStephen Hemminger };
455300ce174SStephen Hemminger 
456300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
457300ce174SStephen Hemminger {
458300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
459300ce174SStephen Hemminger 	struct sk_buff_head *list = &sch->q;
4605f86173bSJussi Kivilinna 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
461300ce174SStephen Hemminger 	struct sk_buff *skb;
462300ce174SStephen Hemminger 
463300ce174SStephen Hemminger 	if (likely(skb_queue_len(list) < q->limit)) {
464075aa573SStephen Hemminger 		/* Optimize for add at tail */
465104e0878SPatrick McHardy 		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
466075aa573SStephen Hemminger 			q->oldest = tnext;
467075aa573SStephen Hemminger 			return qdisc_enqueue_tail(nskb, sch);
468075aa573SStephen Hemminger 		}
469075aa573SStephen Hemminger 
470300ce174SStephen Hemminger 		skb_queue_reverse_walk(list, skb) {
4715f86173bSJussi Kivilinna 			const struct netem_skb_cb *cb = netem_skb_cb(skb);
472300ce174SStephen Hemminger 
473104e0878SPatrick McHardy 			if (tnext >= cb->time_to_send)
474300ce174SStephen Hemminger 				break;
475300ce174SStephen Hemminger 		}
476300ce174SStephen Hemminger 
477300ce174SStephen Hemminger 		__skb_queue_after(list, skb, nskb);
478300ce174SStephen Hemminger 
4790abf77e5SJussi Kivilinna 		sch->qstats.backlog += qdisc_pkt_len(nskb);
4800abf77e5SJussi Kivilinna 		sch->bstats.bytes += qdisc_pkt_len(nskb);
481300ce174SStephen Hemminger 		sch->bstats.packets++;
482300ce174SStephen Hemminger 
483300ce174SStephen Hemminger 		return NET_XMIT_SUCCESS;
484300ce174SStephen Hemminger 	}
485300ce174SStephen Hemminger 
486075aa573SStephen Hemminger 	return qdisc_reshape_fail(nskb, sch);
487300ce174SStephen Hemminger }
488300ce174SStephen Hemminger 
4891e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
490300ce174SStephen Hemminger {
491300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
492300ce174SStephen Hemminger 
493300ce174SStephen Hemminger 	if (opt) {
4941e90474cSPatrick McHardy 		struct tc_fifo_qopt *ctl = nla_data(opt);
4951e90474cSPatrick McHardy 		if (nla_len(opt) < sizeof(*ctl))
496300ce174SStephen Hemminger 			return -EINVAL;
497300ce174SStephen Hemminger 
498300ce174SStephen Hemminger 		q->limit = ctl->limit;
499300ce174SStephen Hemminger 	} else
5005ce2d488SDavid S. Miller 		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
501300ce174SStephen Hemminger 
502a084980dSPatrick McHardy 	q->oldest = PSCHED_PASTPERFECT;
503300ce174SStephen Hemminger 	return 0;
504300ce174SStephen Hemminger }
505300ce174SStephen Hemminger 
506300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
507300ce174SStephen Hemminger {
508300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
509300ce174SStephen Hemminger 	struct tc_fifo_qopt opt = { .limit = q->limit };
510300ce174SStephen Hemminger 
5111e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
512300ce174SStephen Hemminger 	return skb->len;
513300ce174SStephen Hemminger 
5141e90474cSPatrick McHardy nla_put_failure:
515300ce174SStephen Hemminger 	return -1;
516300ce174SStephen Hemminger }
517300ce174SStephen Hemminger 
51820fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
519300ce174SStephen Hemminger 	.id		=	"tfifo",
520300ce174SStephen Hemminger 	.priv_size	=	sizeof(struct fifo_sched_data),
521300ce174SStephen Hemminger 	.enqueue	=	tfifo_enqueue,
522300ce174SStephen Hemminger 	.dequeue	=	qdisc_dequeue_head,
5238e3af978SJarek Poplawski 	.peek		=	qdisc_peek_head,
524300ce174SStephen Hemminger 	.drop		=	qdisc_queue_drop,
525300ce174SStephen Hemminger 	.init		=	tfifo_init,
526300ce174SStephen Hemminger 	.reset		=	qdisc_reset_queue,
527300ce174SStephen Hemminger 	.change		=	tfifo_init,
528300ce174SStephen Hemminger 	.dump		=	tfifo_dump,
529300ce174SStephen Hemminger };
530300ce174SStephen Hemminger 
5311e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
5321da177e4SLinus Torvalds {
5331da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5341da177e4SLinus Torvalds 	int ret;
5351da177e4SLinus Torvalds 
5361da177e4SLinus Torvalds 	if (!opt)
5371da177e4SLinus Torvalds 		return -EINVAL;
5381da177e4SLinus Torvalds 
53959cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
5401da177e4SLinus Torvalds 
5413511c913SChangli Gao 	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
5429f9afec4SPatrick McHardy 				     TC_H_MAKE(sch->handle, 1));
5431da177e4SLinus Torvalds 	if (!q->qdisc) {
5441da177e4SLinus Torvalds 		pr_debug("netem: qdisc create failed\n");
5451da177e4SLinus Torvalds 		return -ENOMEM;
5461da177e4SLinus Torvalds 	}
5471da177e4SLinus Torvalds 
5481da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
5491da177e4SLinus Torvalds 	if (ret) {
5501da177e4SLinus Torvalds 		pr_debug("netem: change failed\n");
5511da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
5521da177e4SLinus Torvalds 	}
5531da177e4SLinus Torvalds 	return ret;
5541da177e4SLinus Torvalds }
5551da177e4SLinus Torvalds 
5561da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
5571da177e4SLinus Torvalds {
5581da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5591da177e4SLinus Torvalds 
56059cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
5611da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
5621da177e4SLinus Torvalds 	kfree(q->delay_dist);
5631da177e4SLinus Torvalds }
5641da177e4SLinus Torvalds 
5651da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
5661da177e4SLinus Torvalds {
5671da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
56827a884dcSArnaldo Carvalho de Melo 	unsigned char *b = skb_tail_pointer(skb);
5691e90474cSPatrick McHardy 	struct nlattr *nla = (struct nlattr *) b;
5701da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
5711da177e4SLinus Torvalds 	struct tc_netem_corr cor;
5720dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
573c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds 	qopt.latency = q->latency;
5761da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
5771da177e4SLinus Torvalds 	qopt.limit = q->limit;
5781da177e4SLinus Torvalds 	qopt.loss = q->loss;
5791da177e4SLinus Torvalds 	qopt.gap = q->gap;
5801da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
5811e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
5821da177e4SLinus Torvalds 
5831da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
5841da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
5851da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
5861e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
5870dca51d3SStephen Hemminger 
5880dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
5890dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
5901e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
5910dca51d3SStephen Hemminger 
592c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
593c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
5941e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
595c865e5d9SStephen Hemminger 
5961e90474cSPatrick McHardy 	nla->nla_len = skb_tail_pointer(skb) - b;
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds 	return skb->len;
5991da177e4SLinus Torvalds 
6001e90474cSPatrick McHardy nla_put_failure:
601dc5fc579SArnaldo Carvalho de Melo 	nlmsg_trim(skb, b);
6021da177e4SLinus Torvalds 	return -1;
6031da177e4SLinus Torvalds }
6041da177e4SLinus Torvalds 
60520fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
6061da177e4SLinus Torvalds 	.id		=	"netem",
6071da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
6081da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
6091da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
61077be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
6111da177e4SLinus Torvalds 	.drop		=	netem_drop,
6121da177e4SLinus Torvalds 	.init		=	netem_init,
6131da177e4SLinus Torvalds 	.reset		=	netem_reset,
6141da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
6151da177e4SLinus Torvalds 	.change		=	netem_change,
6161da177e4SLinus Torvalds 	.dump		=	netem_dump,
6171da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
6181da177e4SLinus Torvalds };
6191da177e4SLinus Torvalds 
6201da177e4SLinus Torvalds 
6211da177e4SLinus Torvalds static int __init netem_module_init(void)
6221da177e4SLinus Torvalds {
623eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
6241da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
6251da177e4SLinus Torvalds }
6261da177e4SLinus Torvalds static void __exit netem_module_exit(void)
6271da177e4SLinus Torvalds {
6281da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
6291da177e4SLinus Torvalds }
6301da177e4SLinus Torvalds module_init(netem_module_init)
6311da177e4SLinus Torvalds module_exit(netem_module_exit)
6321da177e4SLinus Torvalds MODULE_LICENSE("GPL");
633