xref: /openbmc/linux/net/sched/sch_netem.c (revision eb229c4c)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
71da177e4SLinus Torvalds  * 		2 of the License, or (at your option) any later version.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/config.h>
171da177e4SLinus Torvalds #include <linux/module.h>
181da177e4SLinus Torvalds #include <linux/bitops.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/netdevice.h>
231da177e4SLinus Torvalds #include <linux/skbuff.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds #include <net/pkt_sched.h>
271da177e4SLinus Torvalds 
28eb229c4cSStephen Hemminger #define VERSION "1.1"
29eb229c4cSStephen Hemminger 
301da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
311da177e4SLinus Torvalds 	====================================
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
341da177e4SLinus Torvalds 		 Network Emulation Tool
351da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	 ----------------------------------------------------------------
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
401da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
411da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
421da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
431da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
441da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
451da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
481da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
491da177e4SLinus Torvalds 	 control either since that can be handled by using token
501da177e4SLinus Torvalds 	 bucket or other rate control.
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds 	 The simulator is limited by the Linux timer resolution
531da177e4SLinus Torvalds 	 and will create packet bursts on the HZ boundary (1ms).
541da177e4SLinus Torvalds */
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds struct netem_sched_data {
571da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
581da177e4SLinus Torvalds 	struct timer_list timer;
591da177e4SLinus Torvalds 
601da177e4SLinus Torvalds 	u32 latency;
611da177e4SLinus Torvalds 	u32 loss;
621da177e4SLinus Torvalds 	u32 limit;
631da177e4SLinus Torvalds 	u32 counter;
641da177e4SLinus Torvalds 	u32 gap;
651da177e4SLinus Torvalds 	u32 jitter;
661da177e4SLinus Torvalds 	u32 duplicate;
670dca51d3SStephen Hemminger 	u32 reorder;
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds 	struct crndstate {
701da177e4SLinus Torvalds 		unsigned long last;
711da177e4SLinus Torvalds 		unsigned long rho;
720dca51d3SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor;
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds 	struct disttable {
751da177e4SLinus Torvalds 		u32  size;
761da177e4SLinus Torvalds 		s16 table[0];
771da177e4SLinus Torvalds 	} *delay_dist;
781da177e4SLinus Torvalds };
791da177e4SLinus Torvalds 
801da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
811da177e4SLinus Torvalds struct netem_skb_cb {
821da177e4SLinus Torvalds 	psched_time_t	time_to_send;
831da177e4SLinus Torvalds };
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
861da177e4SLinus Torvalds  * Use entropy source for initial seed.
871da177e4SLinus Torvalds  */
881da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
891da177e4SLinus Torvalds {
901da177e4SLinus Torvalds 	state->rho = rho;
911da177e4SLinus Torvalds 	state->last = net_random();
921da177e4SLinus Torvalds }
931da177e4SLinus Torvalds 
941da177e4SLinus Torvalds /* get_crandom - correlated random number generator
951da177e4SLinus Torvalds  * Next number depends on last value.
961da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
971da177e4SLinus Torvalds  */
981da177e4SLinus Torvalds static unsigned long get_crandom(struct crndstate *state)
991da177e4SLinus Torvalds {
1001da177e4SLinus Torvalds 	u64 value, rho;
1011da177e4SLinus Torvalds 	unsigned long answer;
1021da177e4SLinus Torvalds 
1031da177e4SLinus Torvalds 	if (state->rho == 0)	/* no correllation */
1041da177e4SLinus Torvalds 		return net_random();
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds 	value = net_random();
1071da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1081da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1091da177e4SLinus Torvalds 	state->last = answer;
1101da177e4SLinus Torvalds 	return answer;
1111da177e4SLinus Torvalds }
1121da177e4SLinus Torvalds 
1131da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
1141da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
1151da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
1161da177e4SLinus Torvalds  */
1171da177e4SLinus Torvalds static long tabledist(unsigned long mu, long sigma,
1181da177e4SLinus Torvalds 		      struct crndstate *state, const struct disttable *dist)
1191da177e4SLinus Torvalds {
1201da177e4SLinus Torvalds 	long t, x;
1211da177e4SLinus Torvalds 	unsigned long rnd;
1221da177e4SLinus Torvalds 
1231da177e4SLinus Torvalds 	if (sigma == 0)
1241da177e4SLinus Torvalds 		return mu;
1251da177e4SLinus Torvalds 
1261da177e4SLinus Torvalds 	rnd = get_crandom(state);
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds 	/* default uniform distribution */
1291da177e4SLinus Torvalds 	if (dist == NULL)
1301da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
1311da177e4SLinus Torvalds 
1321da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
1331da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
1341da177e4SLinus Torvalds 	if (x >= 0)
1351da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
1361da177e4SLinus Torvalds 	else
1371da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
1381da177e4SLinus Torvalds 
1391da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
1401da177e4SLinus Torvalds }
1411da177e4SLinus Torvalds 
1420afb51e7SStephen Hemminger /*
1430afb51e7SStephen Hemminger  * Insert one skb into qdisc.
1440afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
1450afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
1460afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
1470afb51e7SStephen Hemminger  */
1481da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1491da177e4SLinus Torvalds {
1501da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
1510f9f32acSStephen Hemminger 	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
1520afb51e7SStephen Hemminger 	struct sk_buff *skb2;
1531da177e4SLinus Torvalds 	int ret;
1540afb51e7SStephen Hemminger 	int count = 1;
1551da177e4SLinus Torvalds 
156771018e7SStephen Hemminger 	pr_debug("netem_enqueue skb=%p\n", skb);
1571da177e4SLinus Torvalds 
1580afb51e7SStephen Hemminger 	/* Random duplication */
1590afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
1600afb51e7SStephen Hemminger 		++count;
1610afb51e7SStephen Hemminger 
1621da177e4SLinus Torvalds 	/* Random packet drop 0 => none, ~0 => all */
1630afb51e7SStephen Hemminger 	if (q->loss && q->loss >= get_crandom(&q->loss_cor))
1640afb51e7SStephen Hemminger 		--count;
1650afb51e7SStephen Hemminger 
1660afb51e7SStephen Hemminger 	if (count == 0) {
1671da177e4SLinus Torvalds 		sch->qstats.drops++;
1681da177e4SLinus Torvalds 		kfree_skb(skb);
1690afb51e7SStephen Hemminger 		return NET_XMIT_DROP;
1701da177e4SLinus Torvalds 	}
1711da177e4SLinus Torvalds 
1720afb51e7SStephen Hemminger 	/*
1730afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
1740afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
1750afb51e7SStephen Hemminger 	 * skb will be queued.
176d5d75cd6SStephen Hemminger 	 */
1770afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
1780afb51e7SStephen Hemminger 		struct Qdisc *rootq = sch->dev->qdisc;
1790afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
1800afb51e7SStephen Hemminger 		q->duplicate = 0;
181d5d75cd6SStephen Hemminger 
1820afb51e7SStephen Hemminger 		rootq->enqueue(skb2, rootq);
1830afb51e7SStephen Hemminger 		q->duplicate = dupsave;
1841da177e4SLinus Torvalds 	}
1851da177e4SLinus Torvalds 
1860dca51d3SStephen Hemminger 	if (q->gap == 0 		/* not doing reordering */
1870dca51d3SStephen Hemminger 	    || q->counter < q->gap 	/* inside last reordering gap */
1880dca51d3SStephen Hemminger 	    || q->reorder < get_crandom(&q->reorder_cor)) {
1890f9f32acSStephen Hemminger 		psched_time_t now;
19007aaa115SStephen Hemminger 		psched_tdiff_t delay;
19107aaa115SStephen Hemminger 
19207aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
19307aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
19407aaa115SStephen Hemminger 
1950f9f32acSStephen Hemminger 		PSCHED_GET_TIME(now);
19607aaa115SStephen Hemminger 		PSCHED_TADD2(now, delay, cb->time_to_send);
1971da177e4SLinus Torvalds 		++q->counter;
1981da177e4SLinus Torvalds 		ret = q->qdisc->enqueue(skb, q->qdisc);
1991da177e4SLinus Torvalds 	} else {
2000dca51d3SStephen Hemminger 		/*
2010dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
2020dca51d3SStephen Hemminger 		 * of the queue.
2030dca51d3SStephen Hemminger 		 */
2040f9f32acSStephen Hemminger 		PSCHED_GET_TIME(cb->time_to_send);
2050dca51d3SStephen Hemminger 		q->counter = 0;
2060f9f32acSStephen Hemminger 		ret = q->qdisc->ops->requeue(skb, q->qdisc);
2071da177e4SLinus Torvalds 	}
2081da177e4SLinus Torvalds 
2091da177e4SLinus Torvalds 	if (likely(ret == NET_XMIT_SUCCESS)) {
2101da177e4SLinus Torvalds 		sch->q.qlen++;
2111da177e4SLinus Torvalds 		sch->bstats.bytes += skb->len;
2121da177e4SLinus Torvalds 		sch->bstats.packets++;
2131da177e4SLinus Torvalds 	} else
2141da177e4SLinus Torvalds 		sch->qstats.drops++;
2151da177e4SLinus Torvalds 
216d5d75cd6SStephen Hemminger 	pr_debug("netem: enqueue ret %d\n", ret);
2171da177e4SLinus Torvalds 	return ret;
2181da177e4SLinus Torvalds }
2191da177e4SLinus Torvalds 
2201da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */
2211da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
2221da177e4SLinus Torvalds {
2231da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2241da177e4SLinus Torvalds 	int ret;
2251da177e4SLinus Torvalds 
2261da177e4SLinus Torvalds 	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
2271da177e4SLinus Torvalds 		sch->q.qlen++;
2281da177e4SLinus Torvalds 		sch->qstats.requeues++;
2291da177e4SLinus Torvalds 	}
2301da177e4SLinus Torvalds 
2311da177e4SLinus Torvalds 	return ret;
2321da177e4SLinus Torvalds }
2331da177e4SLinus Torvalds 
2341da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch)
2351da177e4SLinus Torvalds {
2361da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2371da177e4SLinus Torvalds 	unsigned int len;
2381da177e4SLinus Torvalds 
2391da177e4SLinus Torvalds 	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
2401da177e4SLinus Torvalds 		sch->q.qlen--;
2411da177e4SLinus Torvalds 		sch->qstats.drops++;
2421da177e4SLinus Torvalds 	}
2431da177e4SLinus Torvalds 	return len;
2441da177e4SLinus Torvalds }
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
2471da177e4SLinus Torvalds {
2481da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2491da177e4SLinus Torvalds 	struct sk_buff *skb;
2501da177e4SLinus Torvalds 
2511da177e4SLinus Torvalds 	skb = q->qdisc->dequeue(q->qdisc);
252771018e7SStephen Hemminger 	if (skb) {
2530f9f32acSStephen Hemminger 		const struct netem_skb_cb *cb
2540f9f32acSStephen Hemminger 			= (const struct netem_skb_cb *)skb->cb;
2550f9f32acSStephen Hemminger 		psched_time_t now;
2560f9f32acSStephen Hemminger 
2570f9f32acSStephen Hemminger 		/* if more time remaining? */
2580f9f32acSStephen Hemminger 		PSCHED_GET_TIME(now);
25907aaa115SStephen Hemminger 
26007aaa115SStephen Hemminger 		if (PSCHED_TLESS(cb->time_to_send, now)) {
261771018e7SStephen Hemminger 			pr_debug("netem_dequeue: return skb=%p\n", skb);
2621da177e4SLinus Torvalds 			sch->q.qlen--;
263771018e7SStephen Hemminger 			sch->flags &= ~TCQ_F_THROTTLED;
2640f9f32acSStephen Hemminger 			return skb;
26507aaa115SStephen Hemminger 		} else {
26607aaa115SStephen Hemminger 			psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
26707aaa115SStephen Hemminger 
26807aaa115SStephen Hemminger 			if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
26907aaa115SStephen Hemminger 				sch->qstats.drops++;
27007aaa115SStephen Hemminger 
27107aaa115SStephen Hemminger 				/* After this qlen is confused */
27207aaa115SStephen Hemminger 				printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
27307aaa115SStephen Hemminger 				       q->qdisc->ops->id);
27407aaa115SStephen Hemminger 
27507aaa115SStephen Hemminger 				sch->q.qlen--;
276771018e7SStephen Hemminger 			}
277771018e7SStephen Hemminger 
27807aaa115SStephen Hemminger 			mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
2790f9f32acSStephen Hemminger 			sch->flags |= TCQ_F_THROTTLED;
28007aaa115SStephen Hemminger 		}
2810f9f32acSStephen Hemminger 	}
2820f9f32acSStephen Hemminger 
2830f9f32acSStephen Hemminger 	return NULL;
2841da177e4SLinus Torvalds }
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds static void netem_watchdog(unsigned long arg)
2871da177e4SLinus Torvalds {
2881da177e4SLinus Torvalds 	struct Qdisc *sch = (struct Qdisc *)arg;
2891da177e4SLinus Torvalds 
290771018e7SStephen Hemminger 	pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
291771018e7SStephen Hemminger 	sch->flags &= ~TCQ_F_THROTTLED;
292771018e7SStephen Hemminger 	netif_schedule(sch->dev);
2931da177e4SLinus Torvalds }
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
2961da177e4SLinus Torvalds {
2971da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
3001da177e4SLinus Torvalds 	sch->q.qlen = 0;
301771018e7SStephen Hemminger 	sch->flags &= ~TCQ_F_THROTTLED;
3021da177e4SLinus Torvalds 	del_timer_sync(&q->timer);
3031da177e4SLinus Torvalds }
3041da177e4SLinus Torvalds 
305300ce174SStephen Hemminger /* Pass size change message down to embedded FIFO */
3061da177e4SLinus Torvalds static int set_fifo_limit(struct Qdisc *q, int limit)
3071da177e4SLinus Torvalds {
3081da177e4SLinus Torvalds         struct rtattr *rta;
3091da177e4SLinus Torvalds 	int ret = -ENOMEM;
3101da177e4SLinus Torvalds 
311300ce174SStephen Hemminger 	/* Hack to avoid sending change message to non-FIFO */
312300ce174SStephen Hemminger 	if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
313300ce174SStephen Hemminger 		return 0;
314300ce174SStephen Hemminger 
3151da177e4SLinus Torvalds 	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
3161da177e4SLinus Torvalds 	if (rta) {
3171da177e4SLinus Torvalds 		rta->rta_type = RTM_NEWQDISC;
3181da177e4SLinus Torvalds 		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
3191da177e4SLinus Torvalds 		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
3201da177e4SLinus Torvalds 
3211da177e4SLinus Torvalds 		ret = q->ops->change(q, rta);
3221da177e4SLinus Torvalds 		kfree(rta);
3231da177e4SLinus Torvalds 	}
3241da177e4SLinus Torvalds 	return ret;
3251da177e4SLinus Torvalds }
3261da177e4SLinus Torvalds 
3271da177e4SLinus Torvalds /*
3281da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
3291da177e4SLinus Torvalds  * signed 16 bit values.
3301da177e4SLinus Torvalds  */
3311da177e4SLinus Torvalds static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
3321da177e4SLinus Torvalds {
3331da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3341da177e4SLinus Torvalds 	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
3351da177e4SLinus Torvalds 	const __s16 *data = RTA_DATA(attr);
3361da177e4SLinus Torvalds 	struct disttable *d;
3371da177e4SLinus Torvalds 	int i;
3381da177e4SLinus Torvalds 
3391da177e4SLinus Torvalds 	if (n > 65536)
3401da177e4SLinus Torvalds 		return -EINVAL;
3411da177e4SLinus Torvalds 
3421da177e4SLinus Torvalds 	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
3431da177e4SLinus Torvalds 	if (!d)
3441da177e4SLinus Torvalds 		return -ENOMEM;
3451da177e4SLinus Torvalds 
3461da177e4SLinus Torvalds 	d->size = n;
3471da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
3481da177e4SLinus Torvalds 		d->table[i] = data[i];
3491da177e4SLinus Torvalds 
3501da177e4SLinus Torvalds 	spin_lock_bh(&sch->dev->queue_lock);
3511da177e4SLinus Torvalds 	d = xchg(&q->delay_dist, d);
3521da177e4SLinus Torvalds 	spin_unlock_bh(&sch->dev->queue_lock);
3531da177e4SLinus Torvalds 
3541da177e4SLinus Torvalds 	kfree(d);
3551da177e4SLinus Torvalds 	return 0;
3561da177e4SLinus Torvalds }
3571da177e4SLinus Torvalds 
3581da177e4SLinus Torvalds static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
3591da177e4SLinus Torvalds {
3601da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3611da177e4SLinus Torvalds 	const struct tc_netem_corr *c = RTA_DATA(attr);
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds 	if (RTA_PAYLOAD(attr) != sizeof(*c))
3641da177e4SLinus Torvalds 		return -EINVAL;
3651da177e4SLinus Torvalds 
3661da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
3671da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
3681da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
3691da177e4SLinus Torvalds 	return 0;
3701da177e4SLinus Torvalds }
3711da177e4SLinus Torvalds 
3720dca51d3SStephen Hemminger static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
3730dca51d3SStephen Hemminger {
3740dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3750dca51d3SStephen Hemminger 	const struct tc_netem_reorder *r = RTA_DATA(attr);
3760dca51d3SStephen Hemminger 
3770dca51d3SStephen Hemminger 	if (RTA_PAYLOAD(attr) != sizeof(*r))
3780dca51d3SStephen Hemminger 		return -EINVAL;
3790dca51d3SStephen Hemminger 
3800dca51d3SStephen Hemminger 	q->reorder = r->probability;
3810dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
3820dca51d3SStephen Hemminger 	return 0;
3830dca51d3SStephen Hemminger }
3840dca51d3SStephen Hemminger 
3851da177e4SLinus Torvalds static int netem_change(struct Qdisc *sch, struct rtattr *opt)
3861da177e4SLinus Torvalds {
3871da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3881da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
3891da177e4SLinus Torvalds 	int ret;
3901da177e4SLinus Torvalds 
3911da177e4SLinus Torvalds 	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
3921da177e4SLinus Torvalds 		return -EINVAL;
3931da177e4SLinus Torvalds 
3941da177e4SLinus Torvalds 	qopt = RTA_DATA(opt);
3951da177e4SLinus Torvalds 	ret = set_fifo_limit(q->qdisc, qopt->limit);
3961da177e4SLinus Torvalds 	if (ret) {
3971da177e4SLinus Torvalds 		pr_debug("netem: can't set fifo limit\n");
3981da177e4SLinus Torvalds 		return ret;
3991da177e4SLinus Torvalds 	}
4001da177e4SLinus Torvalds 
4011da177e4SLinus Torvalds 	q->latency = qopt->latency;
4021da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
4031da177e4SLinus Torvalds 	q->limit = qopt->limit;
4041da177e4SLinus Torvalds 	q->gap = qopt->gap;
4050dca51d3SStephen Hemminger 	q->counter = 0;
4061da177e4SLinus Torvalds 	q->loss = qopt->loss;
4071da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
4081da177e4SLinus Torvalds 
4090dca51d3SStephen Hemminger 	/* for compatiablity with earlier versions.
4100dca51d3SStephen Hemminger 	 * if gap is set, need to assume 100% probablity
4110dca51d3SStephen Hemminger 	 */
4120dca51d3SStephen Hemminger 	q->reorder = ~0;
4130dca51d3SStephen Hemminger 
4141da177e4SLinus Torvalds 	/* Handle nested options after initial queue options.
4151da177e4SLinus Torvalds 	 * Should have put all options in nested format but too late now.
4161da177e4SLinus Torvalds 	 */
4171da177e4SLinus Torvalds 	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
4181da177e4SLinus Torvalds 		struct rtattr *tb[TCA_NETEM_MAX];
4191da177e4SLinus Torvalds 		if (rtattr_parse(tb, TCA_NETEM_MAX,
4201da177e4SLinus Torvalds 				 RTA_DATA(opt) + sizeof(*qopt),
4211da177e4SLinus Torvalds 				 RTA_PAYLOAD(opt) - sizeof(*qopt)))
4221da177e4SLinus Torvalds 			return -EINVAL;
4231da177e4SLinus Torvalds 
4241da177e4SLinus Torvalds 		if (tb[TCA_NETEM_CORR-1]) {
4251da177e4SLinus Torvalds 			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
4261da177e4SLinus Torvalds 			if (ret)
4271da177e4SLinus Torvalds 				return ret;
4281da177e4SLinus Torvalds 		}
4291da177e4SLinus Torvalds 
4301da177e4SLinus Torvalds 		if (tb[TCA_NETEM_DELAY_DIST-1]) {
4311da177e4SLinus Torvalds 			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
4321da177e4SLinus Torvalds 			if (ret)
4331da177e4SLinus Torvalds 				return ret;
4341da177e4SLinus Torvalds 		}
4350dca51d3SStephen Hemminger 		if (tb[TCA_NETEM_REORDER-1]) {
4360dca51d3SStephen Hemminger 			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
4370dca51d3SStephen Hemminger 			if (ret)
4380dca51d3SStephen Hemminger 				return ret;
4390dca51d3SStephen Hemminger 		}
4401da177e4SLinus Torvalds 	}
4411da177e4SLinus Torvalds 
4421da177e4SLinus Torvalds 
4431da177e4SLinus Torvalds 	return 0;
4441da177e4SLinus Torvalds }
4451da177e4SLinus Torvalds 
446300ce174SStephen Hemminger /*
447300ce174SStephen Hemminger  * Special case version of FIFO queue for use by netem.
448300ce174SStephen Hemminger  * It queues in order based on timestamps in skb's
449300ce174SStephen Hemminger  */
450300ce174SStephen Hemminger struct fifo_sched_data {
451300ce174SStephen Hemminger 	u32 limit;
452300ce174SStephen Hemminger };
453300ce174SStephen Hemminger 
454300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
455300ce174SStephen Hemminger {
456300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
457300ce174SStephen Hemminger 	struct sk_buff_head *list = &sch->q;
458300ce174SStephen Hemminger 	const struct netem_skb_cb *ncb
459300ce174SStephen Hemminger 		= (const struct netem_skb_cb *)nskb->cb;
460300ce174SStephen Hemminger 	struct sk_buff *skb;
461300ce174SStephen Hemminger 
462300ce174SStephen Hemminger 	if (likely(skb_queue_len(list) < q->limit)) {
463300ce174SStephen Hemminger 		skb_queue_reverse_walk(list, skb) {
464300ce174SStephen Hemminger 			const struct netem_skb_cb *cb
465300ce174SStephen Hemminger 				= (const struct netem_skb_cb *)skb->cb;
466300ce174SStephen Hemminger 
467300ce174SStephen Hemminger 			if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send))
468300ce174SStephen Hemminger 				break;
469300ce174SStephen Hemminger 		}
470300ce174SStephen Hemminger 
471300ce174SStephen Hemminger 		__skb_queue_after(list, skb, nskb);
472300ce174SStephen Hemminger 
473300ce174SStephen Hemminger 		sch->qstats.backlog += nskb->len;
474300ce174SStephen Hemminger 		sch->bstats.bytes += nskb->len;
475300ce174SStephen Hemminger 		sch->bstats.packets++;
476300ce174SStephen Hemminger 
477300ce174SStephen Hemminger 		return NET_XMIT_SUCCESS;
478300ce174SStephen Hemminger 	}
479300ce174SStephen Hemminger 
480300ce174SStephen Hemminger 	return qdisc_drop(nskb, sch);
481300ce174SStephen Hemminger }
482300ce174SStephen Hemminger 
483300ce174SStephen Hemminger static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
484300ce174SStephen Hemminger {
485300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
486300ce174SStephen Hemminger 
487300ce174SStephen Hemminger 	if (opt) {
488300ce174SStephen Hemminger 		struct tc_fifo_qopt *ctl = RTA_DATA(opt);
489300ce174SStephen Hemminger 		if (RTA_PAYLOAD(opt) < sizeof(*ctl))
490300ce174SStephen Hemminger 			return -EINVAL;
491300ce174SStephen Hemminger 
492300ce174SStephen Hemminger 		q->limit = ctl->limit;
493300ce174SStephen Hemminger 	} else
494300ce174SStephen Hemminger 		q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
495300ce174SStephen Hemminger 
496300ce174SStephen Hemminger 	return 0;
497300ce174SStephen Hemminger }
498300ce174SStephen Hemminger 
499300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
500300ce174SStephen Hemminger {
501300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
502300ce174SStephen Hemminger 	struct tc_fifo_qopt opt = { .limit = q->limit };
503300ce174SStephen Hemminger 
504300ce174SStephen Hemminger 	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
505300ce174SStephen Hemminger 	return skb->len;
506300ce174SStephen Hemminger 
507300ce174SStephen Hemminger rtattr_failure:
508300ce174SStephen Hemminger 	return -1;
509300ce174SStephen Hemminger }
510300ce174SStephen Hemminger 
511300ce174SStephen Hemminger static struct Qdisc_ops tfifo_qdisc_ops = {
512300ce174SStephen Hemminger 	.id		=	"tfifo",
513300ce174SStephen Hemminger 	.priv_size	=	sizeof(struct fifo_sched_data),
514300ce174SStephen Hemminger 	.enqueue	=	tfifo_enqueue,
515300ce174SStephen Hemminger 	.dequeue	=	qdisc_dequeue_head,
516300ce174SStephen Hemminger 	.requeue	=	qdisc_requeue,
517300ce174SStephen Hemminger 	.drop		=	qdisc_queue_drop,
518300ce174SStephen Hemminger 	.init		=	tfifo_init,
519300ce174SStephen Hemminger 	.reset		=	qdisc_reset_queue,
520300ce174SStephen Hemminger 	.change		=	tfifo_init,
521300ce174SStephen Hemminger 	.dump		=	tfifo_dump,
522300ce174SStephen Hemminger };
523300ce174SStephen Hemminger 
5241da177e4SLinus Torvalds static int netem_init(struct Qdisc *sch, struct rtattr *opt)
5251da177e4SLinus Torvalds {
5261da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5271da177e4SLinus Torvalds 	int ret;
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds 	if (!opt)
5301da177e4SLinus Torvalds 		return -EINVAL;
5311da177e4SLinus Torvalds 
5321da177e4SLinus Torvalds 	init_timer(&q->timer);
5331da177e4SLinus Torvalds 	q->timer.function = netem_watchdog;
5341da177e4SLinus Torvalds 	q->timer.data = (unsigned long) sch;
5351da177e4SLinus Torvalds 
536300ce174SStephen Hemminger 	q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops);
5371da177e4SLinus Torvalds 	if (!q->qdisc) {
5381da177e4SLinus Torvalds 		pr_debug("netem: qdisc create failed\n");
5391da177e4SLinus Torvalds 		return -ENOMEM;
5401da177e4SLinus Torvalds 	}
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
5431da177e4SLinus Torvalds 	if (ret) {
5441da177e4SLinus Torvalds 		pr_debug("netem: change failed\n");
5451da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
5461da177e4SLinus Torvalds 	}
5471da177e4SLinus Torvalds 	return ret;
5481da177e4SLinus Torvalds }
5491da177e4SLinus Torvalds 
5501da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
5511da177e4SLinus Torvalds {
5521da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5531da177e4SLinus Torvalds 
5541da177e4SLinus Torvalds 	del_timer_sync(&q->timer);
5551da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
5561da177e4SLinus Torvalds 	kfree(q->delay_dist);
5571da177e4SLinus Torvalds }
5581da177e4SLinus Torvalds 
5591da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
5601da177e4SLinus Torvalds {
5611da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
5621da177e4SLinus Torvalds 	unsigned char	 *b = skb->tail;
5631da177e4SLinus Torvalds 	struct rtattr *rta = (struct rtattr *) b;
5641da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
5651da177e4SLinus Torvalds 	struct tc_netem_corr cor;
5660dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
5671da177e4SLinus Torvalds 
5681da177e4SLinus Torvalds 	qopt.latency = q->latency;
5691da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
5701da177e4SLinus Torvalds 	qopt.limit = q->limit;
5711da177e4SLinus Torvalds 	qopt.loss = q->loss;
5721da177e4SLinus Torvalds 	qopt.gap = q->gap;
5731da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
5741da177e4SLinus Torvalds 	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
5751da177e4SLinus Torvalds 
5761da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
5771da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
5781da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
5791da177e4SLinus Torvalds 	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
5800dca51d3SStephen Hemminger 
5810dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
5820dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
5830dca51d3SStephen Hemminger 	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
5840dca51d3SStephen Hemminger 
5851da177e4SLinus Torvalds 	rta->rta_len = skb->tail - b;
5861da177e4SLinus Torvalds 
5871da177e4SLinus Torvalds 	return skb->len;
5881da177e4SLinus Torvalds 
5891da177e4SLinus Torvalds rtattr_failure:
5901da177e4SLinus Torvalds 	skb_trim(skb, b - skb->data);
5911da177e4SLinus Torvalds 	return -1;
5921da177e4SLinus Torvalds }
5931da177e4SLinus Torvalds 
5941da177e4SLinus Torvalds static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
5951da177e4SLinus Torvalds 			  struct sk_buff *skb, struct tcmsg *tcm)
5961da177e4SLinus Torvalds {
5971da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5981da177e4SLinus Torvalds 
5991da177e4SLinus Torvalds 	if (cl != 1) 	/* only one class */
6001da177e4SLinus Torvalds 		return -ENOENT;
6011da177e4SLinus Torvalds 
6021da177e4SLinus Torvalds 	tcm->tcm_handle |= TC_H_MIN(1);
6031da177e4SLinus Torvalds 	tcm->tcm_info = q->qdisc->handle;
6041da177e4SLinus Torvalds 
6051da177e4SLinus Torvalds 	return 0;
6061da177e4SLinus Torvalds }
6071da177e4SLinus Torvalds 
6081da177e4SLinus Torvalds static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
6091da177e4SLinus Torvalds 		     struct Qdisc **old)
6101da177e4SLinus Torvalds {
6111da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6121da177e4SLinus Torvalds 
6131da177e4SLinus Torvalds 	if (new == NULL)
6141da177e4SLinus Torvalds 		new = &noop_qdisc;
6151da177e4SLinus Torvalds 
6161da177e4SLinus Torvalds 	sch_tree_lock(sch);
6171da177e4SLinus Torvalds 	*old = xchg(&q->qdisc, new);
6181da177e4SLinus Torvalds 	qdisc_reset(*old);
6191da177e4SLinus Torvalds 	sch->q.qlen = 0;
6201da177e4SLinus Torvalds 	sch_tree_unlock(sch);
6211da177e4SLinus Torvalds 
6221da177e4SLinus Torvalds 	return 0;
6231da177e4SLinus Torvalds }
6241da177e4SLinus Torvalds 
6251da177e4SLinus Torvalds static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
6261da177e4SLinus Torvalds {
6271da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6281da177e4SLinus Torvalds 	return q->qdisc;
6291da177e4SLinus Torvalds }
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds static unsigned long netem_get(struct Qdisc *sch, u32 classid)
6321da177e4SLinus Torvalds {
6331da177e4SLinus Torvalds 	return 1;
6341da177e4SLinus Torvalds }
6351da177e4SLinus Torvalds 
6361da177e4SLinus Torvalds static void netem_put(struct Qdisc *sch, unsigned long arg)
6371da177e4SLinus Torvalds {
6381da177e4SLinus Torvalds }
6391da177e4SLinus Torvalds 
6401da177e4SLinus Torvalds static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
6411da177e4SLinus Torvalds 			    struct rtattr **tca, unsigned long *arg)
6421da177e4SLinus Torvalds {
6431da177e4SLinus Torvalds 	return -ENOSYS;
6441da177e4SLinus Torvalds }
6451da177e4SLinus Torvalds 
6461da177e4SLinus Torvalds static int netem_delete(struct Qdisc *sch, unsigned long arg)
6471da177e4SLinus Torvalds {
6481da177e4SLinus Torvalds 	return -ENOSYS;
6491da177e4SLinus Torvalds }
6501da177e4SLinus Torvalds 
6511da177e4SLinus Torvalds static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
6521da177e4SLinus Torvalds {
6531da177e4SLinus Torvalds 	if (!walker->stop) {
6541da177e4SLinus Torvalds 		if (walker->count >= walker->skip)
6551da177e4SLinus Torvalds 			if (walker->fn(sch, 1, walker) < 0) {
6561da177e4SLinus Torvalds 				walker->stop = 1;
6571da177e4SLinus Torvalds 				return;
6581da177e4SLinus Torvalds 			}
6591da177e4SLinus Torvalds 		walker->count++;
6601da177e4SLinus Torvalds 	}
6611da177e4SLinus Torvalds }
6621da177e4SLinus Torvalds 
6631da177e4SLinus Torvalds static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
6641da177e4SLinus Torvalds {
6651da177e4SLinus Torvalds 	return NULL;
6661da177e4SLinus Torvalds }
6671da177e4SLinus Torvalds 
6681da177e4SLinus Torvalds static struct Qdisc_class_ops netem_class_ops = {
6691da177e4SLinus Torvalds 	.graft		=	netem_graft,
6701da177e4SLinus Torvalds 	.leaf		=	netem_leaf,
6711da177e4SLinus Torvalds 	.get		=	netem_get,
6721da177e4SLinus Torvalds 	.put		=	netem_put,
6731da177e4SLinus Torvalds 	.change		=	netem_change_class,
6741da177e4SLinus Torvalds 	.delete		=	netem_delete,
6751da177e4SLinus Torvalds 	.walk		=	netem_walk,
6761da177e4SLinus Torvalds 	.tcf_chain	=	netem_find_tcf,
6771da177e4SLinus Torvalds 	.dump		=	netem_dump_class,
6781da177e4SLinus Torvalds };
6791da177e4SLinus Torvalds 
6801da177e4SLinus Torvalds static struct Qdisc_ops netem_qdisc_ops = {
6811da177e4SLinus Torvalds 	.id		=	"netem",
6821da177e4SLinus Torvalds 	.cl_ops		=	&netem_class_ops,
6831da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
6841da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
6851da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
6861da177e4SLinus Torvalds 	.requeue	=	netem_requeue,
6871da177e4SLinus Torvalds 	.drop		=	netem_drop,
6881da177e4SLinus Torvalds 	.init		=	netem_init,
6891da177e4SLinus Torvalds 	.reset		=	netem_reset,
6901da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
6911da177e4SLinus Torvalds 	.change		=	netem_change,
6921da177e4SLinus Torvalds 	.dump		=	netem_dump,
6931da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
6941da177e4SLinus Torvalds };
6951da177e4SLinus Torvalds 
6961da177e4SLinus Torvalds 
6971da177e4SLinus Torvalds static int __init netem_module_init(void)
6981da177e4SLinus Torvalds {
699eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
7001da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
7011da177e4SLinus Torvalds }
7021da177e4SLinus Torvalds static void __exit netem_module_exit(void)
7031da177e4SLinus Torvalds {
7041da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
7051da177e4SLinus Torvalds }
7061da177e4SLinus Torvalds module_init(netem_module_init)
7071da177e4SLinus Torvalds module_exit(netem_module_exit)
7081da177e4SLinus Torvalds MODULE_LICENSE("GPL");
709