xref: /openbmc/linux/net/sched/sch_netem.c (revision 265eb67f)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7798b6b19SStephen Hemminger  * 		2 of the License.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/module.h>
171da177e4SLinus Torvalds #include <linux/types.h>
181da177e4SLinus Torvalds #include <linux/kernel.h>
191da177e4SLinus Torvalds #include <linux/errno.h>
201da177e4SLinus Torvalds #include <linux/skbuff.h>
211da177e4SLinus Torvalds #include <linux/rtnetlink.h>
221da177e4SLinus Torvalds 
23dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
241da177e4SLinus Torvalds #include <net/pkt_sched.h>
251da177e4SLinus Torvalds 
26c865e5d9SStephen Hemminger #define VERSION "1.2"
27eb229c4cSStephen Hemminger 
281da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
291da177e4SLinus Torvalds 	====================================
301da177e4SLinus Torvalds 
311da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
321da177e4SLinus Torvalds 		 Network Emulation Tool
331da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds 	 ----------------------------------------------------------------
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
381da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
391da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
401da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
411da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
421da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
431da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
461da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
471da177e4SLinus Torvalds 	 control either since that can be handled by using token
481da177e4SLinus Torvalds 	 bucket or other rate control.
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds 	 The simulator is limited by the Linux timer resolution
511da177e4SLinus Torvalds 	 and will create packet bursts on the HZ boundary (1ms).
521da177e4SLinus Torvalds */
531da177e4SLinus Torvalds 
541da177e4SLinus Torvalds struct netem_sched_data {
551da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
5659cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
571da177e4SLinus Torvalds 
58b407621cSStephen Hemminger 	psched_tdiff_t latency;
59b407621cSStephen Hemminger 	psched_tdiff_t jitter;
60b407621cSStephen Hemminger 
611da177e4SLinus Torvalds 	u32 loss;
621da177e4SLinus Torvalds 	u32 limit;
631da177e4SLinus Torvalds 	u32 counter;
641da177e4SLinus Torvalds 	u32 gap;
651da177e4SLinus Torvalds 	u32 duplicate;
660dca51d3SStephen Hemminger 	u32 reorder;
67c865e5d9SStephen Hemminger 	u32 corrupt;
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds 	struct crndstate {
70b407621cSStephen Hemminger 		u32 last;
71b407621cSStephen Hemminger 		u32 rho;
72c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds 	struct disttable {
751da177e4SLinus Torvalds 		u32  size;
761da177e4SLinus Torvalds 		s16 table[0];
771da177e4SLinus Torvalds 	} *delay_dist;
781da177e4SLinus Torvalds };
791da177e4SLinus Torvalds 
801da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
811da177e4SLinus Torvalds struct netem_skb_cb {
821da177e4SLinus Torvalds 	psched_time_t	time_to_send;
831da177e4SLinus Torvalds };
841da177e4SLinus Torvalds 
855f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
865f86173bSJussi Kivilinna {
87175f9c1bSJussi Kivilinna 	BUILD_BUG_ON(sizeof(skb->cb) <
88175f9c1bSJussi Kivilinna 		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
89175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
905f86173bSJussi Kivilinna }
915f86173bSJussi Kivilinna 
921da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
931da177e4SLinus Torvalds  * Use entropy source for initial seed.
941da177e4SLinus Torvalds  */
951da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
961da177e4SLinus Torvalds {
971da177e4SLinus Torvalds 	state->rho = rho;
981da177e4SLinus Torvalds 	state->last = net_random();
991da177e4SLinus Torvalds }
1001da177e4SLinus Torvalds 
1011da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1021da177e4SLinus Torvalds  * Next number depends on last value.
1031da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1041da177e4SLinus Torvalds  */
105b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1061da177e4SLinus Torvalds {
1071da177e4SLinus Torvalds 	u64 value, rho;
1081da177e4SLinus Torvalds 	unsigned long answer;
1091da177e4SLinus Torvalds 
110bb2f8cc0SStephen Hemminger 	if (state->rho == 0)	/* no correlation */
1111da177e4SLinus Torvalds 		return net_random();
1121da177e4SLinus Torvalds 
1131da177e4SLinus Torvalds 	value = net_random();
1141da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1151da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1161da177e4SLinus Torvalds 	state->last = answer;
1171da177e4SLinus Torvalds 	return answer;
1181da177e4SLinus Torvalds }
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
1211da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
1221da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
1231da177e4SLinus Torvalds  */
124b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
125b407621cSStephen Hemminger 				struct crndstate *state,
126b407621cSStephen Hemminger 				const struct disttable *dist)
1271da177e4SLinus Torvalds {
128b407621cSStephen Hemminger 	psched_tdiff_t x;
129b407621cSStephen Hemminger 	long t;
130b407621cSStephen Hemminger 	u32 rnd;
1311da177e4SLinus Torvalds 
1321da177e4SLinus Torvalds 	if (sigma == 0)
1331da177e4SLinus Torvalds 		return mu;
1341da177e4SLinus Torvalds 
1351da177e4SLinus Torvalds 	rnd = get_crandom(state);
1361da177e4SLinus Torvalds 
1371da177e4SLinus Torvalds 	/* default uniform distribution */
1381da177e4SLinus Torvalds 	if (dist == NULL)
1391da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
1421da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
1431da177e4SLinus Torvalds 	if (x >= 0)
1441da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
1451da177e4SLinus Torvalds 	else
1461da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
1471da177e4SLinus Torvalds 
1481da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
1491da177e4SLinus Torvalds }
1501da177e4SLinus Torvalds 
1510afb51e7SStephen Hemminger /*
1520afb51e7SStephen Hemminger  * Insert one skb into qdisc.
1530afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
1540afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
1550afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
1560afb51e7SStephen Hemminger  */
1571da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1581da177e4SLinus Torvalds {
1591da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
16089e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
16189e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
1620afb51e7SStephen Hemminger 	struct sk_buff *skb2;
1631da177e4SLinus Torvalds 	int ret;
1640afb51e7SStephen Hemminger 	int count = 1;
1651da177e4SLinus Torvalds 
166771018e7SStephen Hemminger 	pr_debug("netem_enqueue skb=%p\n", skb);
1671da177e4SLinus Torvalds 
1680afb51e7SStephen Hemminger 	/* Random duplication */
1690afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
1700afb51e7SStephen Hemminger 		++count;
1710afb51e7SStephen Hemminger 
1721da177e4SLinus Torvalds 	/* Random packet drop 0 => none, ~0 => all */
1730afb51e7SStephen Hemminger 	if (q->loss && q->loss >= get_crandom(&q->loss_cor))
1740afb51e7SStephen Hemminger 		--count;
1750afb51e7SStephen Hemminger 
1760afb51e7SStephen Hemminger 	if (count == 0) {
1771da177e4SLinus Torvalds 		sch->qstats.drops++;
1781da177e4SLinus Torvalds 		kfree_skb(skb);
179c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1801da177e4SLinus Torvalds 	}
1811da177e4SLinus Torvalds 
1824e8a5201SDavid S. Miller 	skb_orphan(skb);
1834e8a5201SDavid S. Miller 
1840afb51e7SStephen Hemminger 	/*
1850afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
1860afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
1870afb51e7SStephen Hemminger 	 * skb will be queued.
188d5d75cd6SStephen Hemminger 	 */
1890afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
1907698b4fcSDavid S. Miller 		struct Qdisc *rootq = qdisc_root(sch);
1910afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
1920afb51e7SStephen Hemminger 		q->duplicate = 0;
193d5d75cd6SStephen Hemminger 
1945f86173bSJussi Kivilinna 		qdisc_enqueue_root(skb2, rootq);
1950afb51e7SStephen Hemminger 		q->duplicate = dupsave;
1961da177e4SLinus Torvalds 	}
1971da177e4SLinus Torvalds 
198c865e5d9SStephen Hemminger 	/*
199c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
200c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
201c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
202c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
203c865e5d9SStephen Hemminger 	 */
204c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
205c865e5d9SStephen Hemminger 		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
20684fa7933SPatrick McHardy 		    || (skb->ip_summed == CHECKSUM_PARTIAL
20784fa7933SPatrick McHardy 			&& skb_checksum_help(skb))) {
208c865e5d9SStephen Hemminger 			sch->qstats.drops++;
209c865e5d9SStephen Hemminger 			return NET_XMIT_DROP;
210c865e5d9SStephen Hemminger 		}
211c865e5d9SStephen Hemminger 
212c865e5d9SStephen Hemminger 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
213c865e5d9SStephen Hemminger 	}
214c865e5d9SStephen Hemminger 
2155f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
2160dca51d3SStephen Hemminger 	if (q->gap == 0 		/* not doing reordering */
2170dca51d3SStephen Hemminger 	    || q->counter < q->gap 	/* inside last reordering gap */
2180dca51d3SStephen Hemminger 	    || q->reorder < get_crandom(&q->reorder_cor)) {
2190f9f32acSStephen Hemminger 		psched_time_t now;
22007aaa115SStephen Hemminger 		psched_tdiff_t delay;
22107aaa115SStephen Hemminger 
22207aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
22307aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
22407aaa115SStephen Hemminger 
2253bebcda2SPatrick McHardy 		now = psched_get_time();
2267c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
2271da177e4SLinus Torvalds 		++q->counter;
2285f86173bSJussi Kivilinna 		ret = qdisc_enqueue(skb, q->qdisc);
2291da177e4SLinus Torvalds 	} else {
2300dca51d3SStephen Hemminger 		/*
2310dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
2320dca51d3SStephen Hemminger 		 * of the queue.
2330dca51d3SStephen Hemminger 		 */
2343bebcda2SPatrick McHardy 		cb->time_to_send = psched_get_time();
2350dca51d3SStephen Hemminger 		q->counter = 0;
2368ba25dadSJarek Poplawski 
2378ba25dadSJarek Poplawski 		__skb_queue_head(&q->qdisc->q, skb);
2388ba25dadSJarek Poplawski 		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
2398ba25dadSJarek Poplawski 		q->qdisc->qstats.requeues++;
2408ba25dadSJarek Poplawski 		ret = NET_XMIT_SUCCESS;
2411da177e4SLinus Torvalds 	}
2421da177e4SLinus Torvalds 
2431da177e4SLinus Torvalds 	if (likely(ret == NET_XMIT_SUCCESS)) {
2441da177e4SLinus Torvalds 		sch->q.qlen++;
2450abf77e5SJussi Kivilinna 		sch->bstats.bytes += qdisc_pkt_len(skb);
2461da177e4SLinus Torvalds 		sch->bstats.packets++;
247378a2f09SJarek Poplawski 	} else if (net_xmit_drop_count(ret)) {
2481da177e4SLinus Torvalds 		sch->qstats.drops++;
249378a2f09SJarek Poplawski 	}
2501da177e4SLinus Torvalds 
251d5d75cd6SStephen Hemminger 	pr_debug("netem: enqueue ret %d\n", ret);
2521da177e4SLinus Torvalds 	return ret;
2531da177e4SLinus Torvalds }
2541da177e4SLinus Torvalds 
2551da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */
2561da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
2571da177e4SLinus Torvalds {
2581da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2591da177e4SLinus Torvalds 	int ret;
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds 	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
2621da177e4SLinus Torvalds 		sch->q.qlen++;
2631da177e4SLinus Torvalds 		sch->qstats.requeues++;
2641da177e4SLinus Torvalds 	}
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds 	return ret;
2671da177e4SLinus Torvalds }
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch)
2701da177e4SLinus Torvalds {
2711da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2726d037a26SPatrick McHardy 	unsigned int len = 0;
2731da177e4SLinus Torvalds 
2746d037a26SPatrick McHardy 	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
2751da177e4SLinus Torvalds 		sch->q.qlen--;
2761da177e4SLinus Torvalds 		sch->qstats.drops++;
2771da177e4SLinus Torvalds 	}
2781da177e4SLinus Torvalds 	return len;
2791da177e4SLinus Torvalds }
2801da177e4SLinus Torvalds 
2811da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
2821da177e4SLinus Torvalds {
2831da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2841da177e4SLinus Torvalds 	struct sk_buff *skb;
2851da177e4SLinus Torvalds 
28611274e5aSStephen Hemminger 	smp_mb();
28711274e5aSStephen Hemminger 	if (sch->flags & TCQ_F_THROTTLED)
28811274e5aSStephen Hemminger 		return NULL;
28911274e5aSStephen Hemminger 
29003c05f0dSJarek Poplawski 	skb = q->qdisc->ops->peek(q->qdisc);
291771018e7SStephen Hemminger 	if (skb) {
2925f86173bSJussi Kivilinna 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
2933bebcda2SPatrick McHardy 		psched_time_t now = psched_get_time();
2940f9f32acSStephen Hemminger 
2950f9f32acSStephen Hemminger 		/* if more time remaining? */
296104e0878SPatrick McHardy 		if (cb->time_to_send <= now) {
29777be155cSJarek Poplawski 			skb = qdisc_dequeue_peeked(q->qdisc);
29877be155cSJarek Poplawski 			if (unlikely(!skb))
29903c05f0dSJarek Poplawski 				return NULL;
30003c05f0dSJarek Poplawski 
301771018e7SStephen Hemminger 			pr_debug("netem_dequeue: return skb=%p\n", skb);
3021da177e4SLinus Torvalds 			sch->q.qlen--;
3030f9f32acSStephen Hemminger 			return skb;
30411274e5aSStephen Hemminger 		}
30507aaa115SStephen Hemminger 
30611274e5aSStephen Hemminger 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
3070f9f32acSStephen Hemminger 	}
3080f9f32acSStephen Hemminger 
3090f9f32acSStephen Hemminger 	return NULL;
3101da177e4SLinus Torvalds }
3111da177e4SLinus Torvalds 
3121da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
3131da177e4SLinus Torvalds {
3141da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3151da177e4SLinus Torvalds 
3161da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
3171da177e4SLinus Torvalds 	sch->q.qlen = 0;
31859cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
3191da177e4SLinus Torvalds }
3201da177e4SLinus Torvalds 
3211da177e4SLinus Torvalds /*
3221da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
3231da177e4SLinus Torvalds  * signed 16 bit values.
3241da177e4SLinus Torvalds  */
3251e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
3261da177e4SLinus Torvalds {
3271da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3281e90474cSPatrick McHardy 	unsigned long n = nla_len(attr)/sizeof(__s16);
3291e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
3307698b4fcSDavid S. Miller 	spinlock_t *root_lock;
3311da177e4SLinus Torvalds 	struct disttable *d;
3321da177e4SLinus Torvalds 	int i;
3331da177e4SLinus Torvalds 
3341da177e4SLinus Torvalds 	if (n > 65536)
3351da177e4SLinus Torvalds 		return -EINVAL;
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
3381da177e4SLinus Torvalds 	if (!d)
3391da177e4SLinus Torvalds 		return -ENOMEM;
3401da177e4SLinus Torvalds 
3411da177e4SLinus Torvalds 	d->size = n;
3421da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
3431da177e4SLinus Torvalds 		d->table[i] = data[i];
3441da177e4SLinus Torvalds 
345102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
3467698b4fcSDavid S. Miller 
3477698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
3481da177e4SLinus Torvalds 	d = xchg(&q->delay_dist, d);
3497698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
3501da177e4SLinus Torvalds 
3511da177e4SLinus Torvalds 	kfree(d);
3521da177e4SLinus Torvalds 	return 0;
3531da177e4SLinus Torvalds }
3541da177e4SLinus Torvalds 
355265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
3561da177e4SLinus Torvalds {
3571da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3581e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
3591da177e4SLinus Torvalds 
3601da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
3611da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
3621da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
3631da177e4SLinus Torvalds }
3641da177e4SLinus Torvalds 
365265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
3660dca51d3SStephen Hemminger {
3670dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3681e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
3690dca51d3SStephen Hemminger 
3700dca51d3SStephen Hemminger 	q->reorder = r->probability;
3710dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
3720dca51d3SStephen Hemminger }
3730dca51d3SStephen Hemminger 
374265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
375c865e5d9SStephen Hemminger {
376c865e5d9SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3771e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
378c865e5d9SStephen Hemminger 
379c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
380c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
381c865e5d9SStephen Hemminger }
382c865e5d9SStephen Hemminger 
38327a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
38427a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
38527a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
38627a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
38727a3421eSPatrick McHardy };
38827a3421eSPatrick McHardy 
3892c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
3902c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
3912c10b32bSThomas Graf {
3922c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
3932c10b32bSThomas Graf 
3942c10b32bSThomas Graf 	if (nested_len < 0)
3952c10b32bSThomas Graf 		return -EINVAL;
3962c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
3972c10b32bSThomas Graf 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
3982c10b32bSThomas Graf 				 nested_len, policy);
3992c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
4002c10b32bSThomas Graf 	return 0;
4012c10b32bSThomas Graf }
4022c10b32bSThomas Graf 
403c865e5d9SStephen Hemminger /* Parse netlink message to set options */
4041e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt)
4051da177e4SLinus Torvalds {
4061da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
407b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
4081da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
4091da177e4SLinus Torvalds 	int ret;
4101da177e4SLinus Torvalds 
411b03f4672SPatrick McHardy 	if (opt == NULL)
4121da177e4SLinus Torvalds 		return -EINVAL;
4131da177e4SLinus Torvalds 
4142c10b32bSThomas Graf 	qopt = nla_data(opt);
4152c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
416b03f4672SPatrick McHardy 	if (ret < 0)
417b03f4672SPatrick McHardy 		return ret;
418b03f4672SPatrick McHardy 
419fb0305ceSPatrick McHardy 	ret = fifo_set_limit(q->qdisc, qopt->limit);
4201da177e4SLinus Torvalds 	if (ret) {
4211da177e4SLinus Torvalds 		pr_debug("netem: can't set fifo limit\n");
4221da177e4SLinus Torvalds 		return ret;
4231da177e4SLinus Torvalds 	}
4241da177e4SLinus Torvalds 
4251da177e4SLinus Torvalds 	q->latency = qopt->latency;
4261da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
4271da177e4SLinus Torvalds 	q->limit = qopt->limit;
4281da177e4SLinus Torvalds 	q->gap = qopt->gap;
4290dca51d3SStephen Hemminger 	q->counter = 0;
4301da177e4SLinus Torvalds 	q->loss = qopt->loss;
4311da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
4321da177e4SLinus Torvalds 
433bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
434bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
4350dca51d3SStephen Hemminger 	 */
436a362e0a7SStephen Hemminger 	if (q->gap)
4370dca51d3SStephen Hemminger 		q->reorder = ~0;
4380dca51d3SStephen Hemminger 
439265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
440265eb67fSStephen Hemminger 		get_correlation(sch, tb[TCA_NETEM_CORR]);
4411da177e4SLinus Torvalds 
4421e90474cSPatrick McHardy 	if (tb[TCA_NETEM_DELAY_DIST]) {
4431e90474cSPatrick McHardy 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
4441da177e4SLinus Torvalds 		if (ret)
4451da177e4SLinus Torvalds 			return ret;
4461da177e4SLinus Torvalds 	}
447c865e5d9SStephen Hemminger 
448265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
449265eb67fSStephen Hemminger 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
4501da177e4SLinus Torvalds 
451265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
452265eb67fSStephen Hemminger 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
4531da177e4SLinus Torvalds 
4541da177e4SLinus Torvalds 	return 0;
4551da177e4SLinus Torvalds }
4561da177e4SLinus Torvalds 
457300ce174SStephen Hemminger /*
458300ce174SStephen Hemminger  * Special case version of FIFO queue for use by netem.
459300ce174SStephen Hemminger  * It queues in order based on timestamps in skb's
460300ce174SStephen Hemminger  */
461300ce174SStephen Hemminger struct fifo_sched_data {
462300ce174SStephen Hemminger 	u32 limit;
463075aa573SStephen Hemminger 	psched_time_t oldest;
464300ce174SStephen Hemminger };
465300ce174SStephen Hemminger 
466300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
467300ce174SStephen Hemminger {
468300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
469300ce174SStephen Hemminger 	struct sk_buff_head *list = &sch->q;
4705f86173bSJussi Kivilinna 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
471300ce174SStephen Hemminger 	struct sk_buff *skb;
472300ce174SStephen Hemminger 
473300ce174SStephen Hemminger 	if (likely(skb_queue_len(list) < q->limit)) {
474075aa573SStephen Hemminger 		/* Optimize for add at tail */
475104e0878SPatrick McHardy 		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
476075aa573SStephen Hemminger 			q->oldest = tnext;
477075aa573SStephen Hemminger 			return qdisc_enqueue_tail(nskb, sch);
478075aa573SStephen Hemminger 		}
479075aa573SStephen Hemminger 
480300ce174SStephen Hemminger 		skb_queue_reverse_walk(list, skb) {
4815f86173bSJussi Kivilinna 			const struct netem_skb_cb *cb = netem_skb_cb(skb);
482300ce174SStephen Hemminger 
483104e0878SPatrick McHardy 			if (tnext >= cb->time_to_send)
484300ce174SStephen Hemminger 				break;
485300ce174SStephen Hemminger 		}
486300ce174SStephen Hemminger 
487300ce174SStephen Hemminger 		__skb_queue_after(list, skb, nskb);
488300ce174SStephen Hemminger 
4890abf77e5SJussi Kivilinna 		sch->qstats.backlog += qdisc_pkt_len(nskb);
4900abf77e5SJussi Kivilinna 		sch->bstats.bytes += qdisc_pkt_len(nskb);
491300ce174SStephen Hemminger 		sch->bstats.packets++;
492300ce174SStephen Hemminger 
493300ce174SStephen Hemminger 		return NET_XMIT_SUCCESS;
494300ce174SStephen Hemminger 	}
495300ce174SStephen Hemminger 
496075aa573SStephen Hemminger 	return qdisc_reshape_fail(nskb, sch);
497300ce174SStephen Hemminger }
498300ce174SStephen Hemminger 
4991e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
500300ce174SStephen Hemminger {
501300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
502300ce174SStephen Hemminger 
503300ce174SStephen Hemminger 	if (opt) {
5041e90474cSPatrick McHardy 		struct tc_fifo_qopt *ctl = nla_data(opt);
5051e90474cSPatrick McHardy 		if (nla_len(opt) < sizeof(*ctl))
506300ce174SStephen Hemminger 			return -EINVAL;
507300ce174SStephen Hemminger 
508300ce174SStephen Hemminger 		q->limit = ctl->limit;
509300ce174SStephen Hemminger 	} else
5105ce2d488SDavid S. Miller 		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
511300ce174SStephen Hemminger 
512a084980dSPatrick McHardy 	q->oldest = PSCHED_PASTPERFECT;
513300ce174SStephen Hemminger 	return 0;
514300ce174SStephen Hemminger }
515300ce174SStephen Hemminger 
516300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
517300ce174SStephen Hemminger {
518300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
519300ce174SStephen Hemminger 	struct tc_fifo_qopt opt = { .limit = q->limit };
520300ce174SStephen Hemminger 
5211e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
522300ce174SStephen Hemminger 	return skb->len;
523300ce174SStephen Hemminger 
5241e90474cSPatrick McHardy nla_put_failure:
525300ce174SStephen Hemminger 	return -1;
526300ce174SStephen Hemminger }
527300ce174SStephen Hemminger 
52820fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
529300ce174SStephen Hemminger 	.id		=	"tfifo",
530300ce174SStephen Hemminger 	.priv_size	=	sizeof(struct fifo_sched_data),
531300ce174SStephen Hemminger 	.enqueue	=	tfifo_enqueue,
532300ce174SStephen Hemminger 	.dequeue	=	qdisc_dequeue_head,
5338e3af978SJarek Poplawski 	.peek		=	qdisc_peek_head,
534300ce174SStephen Hemminger 	.requeue	=	qdisc_requeue,
535300ce174SStephen Hemminger 	.drop		=	qdisc_queue_drop,
536300ce174SStephen Hemminger 	.init		=	tfifo_init,
537300ce174SStephen Hemminger 	.reset		=	qdisc_reset_queue,
538300ce174SStephen Hemminger 	.change		=	tfifo_init,
539300ce174SStephen Hemminger 	.dump		=	tfifo_dump,
540300ce174SStephen Hemminger };
541300ce174SStephen Hemminger 
5421e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt)
5431da177e4SLinus Torvalds {
5441da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5451da177e4SLinus Torvalds 	int ret;
5461da177e4SLinus Torvalds 
5471da177e4SLinus Torvalds 	if (!opt)
5481da177e4SLinus Torvalds 		return -EINVAL;
5491da177e4SLinus Torvalds 
55059cb5c67SPatrick McHardy 	qdisc_watchdog_init(&q->watchdog, sch);
5511da177e4SLinus Torvalds 
5525ce2d488SDavid S. Miller 	q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
553bb949fbdSDavid S. Miller 				     &tfifo_qdisc_ops,
5549f9afec4SPatrick McHardy 				     TC_H_MAKE(sch->handle, 1));
5551da177e4SLinus Torvalds 	if (!q->qdisc) {
5561da177e4SLinus Torvalds 		pr_debug("netem: qdisc create failed\n");
5571da177e4SLinus Torvalds 		return -ENOMEM;
5581da177e4SLinus Torvalds 	}
5591da177e4SLinus Torvalds 
5601da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
5611da177e4SLinus Torvalds 	if (ret) {
5621da177e4SLinus Torvalds 		pr_debug("netem: change failed\n");
5631da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
5641da177e4SLinus Torvalds 	}
5651da177e4SLinus Torvalds 	return ret;
5661da177e4SLinus Torvalds }
5671da177e4SLinus Torvalds 
5681da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
5691da177e4SLinus Torvalds {
5701da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5711da177e4SLinus Torvalds 
57259cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
5731da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
5741da177e4SLinus Torvalds 	kfree(q->delay_dist);
5751da177e4SLinus Torvalds }
5761da177e4SLinus Torvalds 
5771da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
5781da177e4SLinus Torvalds {
5791da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
58027a884dcSArnaldo Carvalho de Melo 	unsigned char *b = skb_tail_pointer(skb);
5811e90474cSPatrick McHardy 	struct nlattr *nla = (struct nlattr *) b;
5821da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
5831da177e4SLinus Torvalds 	struct tc_netem_corr cor;
5840dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
585c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
5861da177e4SLinus Torvalds 
5871da177e4SLinus Torvalds 	qopt.latency = q->latency;
5881da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
5891da177e4SLinus Torvalds 	qopt.limit = q->limit;
5901da177e4SLinus Torvalds 	qopt.loss = q->loss;
5911da177e4SLinus Torvalds 	qopt.gap = q->gap;
5921da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
5931e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
5961da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
5971da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
5981e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
5990dca51d3SStephen Hemminger 
6000dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
6010dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
6021e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
6030dca51d3SStephen Hemminger 
604c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
605c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
6061e90474cSPatrick McHardy 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
607c865e5d9SStephen Hemminger 
6081e90474cSPatrick McHardy 	nla->nla_len = skb_tail_pointer(skb) - b;
6091da177e4SLinus Torvalds 
6101da177e4SLinus Torvalds 	return skb->len;
6111da177e4SLinus Torvalds 
6121e90474cSPatrick McHardy nla_put_failure:
613dc5fc579SArnaldo Carvalho de Melo 	nlmsg_trim(skb, b);
6141da177e4SLinus Torvalds 	return -1;
6151da177e4SLinus Torvalds }
6161da177e4SLinus Torvalds 
61720fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
6181da177e4SLinus Torvalds 	.id		=	"netem",
6191da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
6201da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
6211da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
62277be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
6231da177e4SLinus Torvalds 	.requeue	=	netem_requeue,
6241da177e4SLinus Torvalds 	.drop		=	netem_drop,
6251da177e4SLinus Torvalds 	.init		=	netem_init,
6261da177e4SLinus Torvalds 	.reset		=	netem_reset,
6271da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
6281da177e4SLinus Torvalds 	.change		=	netem_change,
6291da177e4SLinus Torvalds 	.dump		=	netem_dump,
6301da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
6311da177e4SLinus Torvalds };
6321da177e4SLinus Torvalds 
6331da177e4SLinus Torvalds 
6341da177e4SLinus Torvalds static int __init netem_module_init(void)
6351da177e4SLinus Torvalds {
636eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
6371da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
6381da177e4SLinus Torvalds }
6391da177e4SLinus Torvalds static void __exit netem_module_exit(void)
6401da177e4SLinus Torvalds {
6411da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
6421da177e4SLinus Torvalds }
6431da177e4SLinus Torvalds module_init(netem_module_init)
6441da177e4SLinus Torvalds module_exit(netem_module_exit)
6451da177e4SLinus Torvalds MODULE_LICENSE("GPL");
646