xref: /openbmc/linux/net/sched/sch_netem.c (revision 300ce174)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
51da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
61da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
71da177e4SLinus Torvalds  * 		2 of the License, or (at your option) any later version.
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
101da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
131da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/config.h>
171da177e4SLinus Torvalds #include <linux/module.h>
181da177e4SLinus Torvalds #include <linux/bitops.h>
191da177e4SLinus Torvalds #include <linux/types.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/netdevice.h>
231da177e4SLinus Torvalds #include <linux/skbuff.h>
241da177e4SLinus Torvalds #include <linux/rtnetlink.h>
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds #include <net/pkt_sched.h>
271da177e4SLinus Torvalds 
281da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
291da177e4SLinus Torvalds 	====================================
301da177e4SLinus Torvalds 
311da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
321da177e4SLinus Torvalds 		 Network Emulation Tool
331da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds 	 ----------------------------------------------------------------
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
381da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
391da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
401da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
411da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
421da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
431da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
461da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
471da177e4SLinus Torvalds 	 control either since that can be handled by using token
481da177e4SLinus Torvalds 	 bucket or other rate control.
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds 	 The simulator is limited by the Linux timer resolution
511da177e4SLinus Torvalds 	 and will create packet bursts on the HZ boundary (1ms).
521da177e4SLinus Torvalds */
531da177e4SLinus Torvalds 
541da177e4SLinus Torvalds struct netem_sched_data {
551da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
561da177e4SLinus Torvalds 	struct timer_list timer;
571da177e4SLinus Torvalds 
581da177e4SLinus Torvalds 	u32 latency;
591da177e4SLinus Torvalds 	u32 loss;
601da177e4SLinus Torvalds 	u32 limit;
611da177e4SLinus Torvalds 	u32 counter;
621da177e4SLinus Torvalds 	u32 gap;
631da177e4SLinus Torvalds 	u32 jitter;
641da177e4SLinus Torvalds 	u32 duplicate;
650dca51d3SStephen Hemminger 	u32 reorder;
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds 	struct crndstate {
681da177e4SLinus Torvalds 		unsigned long last;
691da177e4SLinus Torvalds 		unsigned long rho;
700dca51d3SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor;
711da177e4SLinus Torvalds 
721da177e4SLinus Torvalds 	struct disttable {
731da177e4SLinus Torvalds 		u32  size;
741da177e4SLinus Torvalds 		s16 table[0];
751da177e4SLinus Torvalds 	} *delay_dist;
761da177e4SLinus Torvalds };
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
791da177e4SLinus Torvalds struct netem_skb_cb {
801da177e4SLinus Torvalds 	psched_time_t	time_to_send;
811da177e4SLinus Torvalds };
821da177e4SLinus Torvalds 
831da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
841da177e4SLinus Torvalds  * Use entropy source for initial seed.
851da177e4SLinus Torvalds  */
861da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
871da177e4SLinus Torvalds {
881da177e4SLinus Torvalds 	state->rho = rho;
891da177e4SLinus Torvalds 	state->last = net_random();
901da177e4SLinus Torvalds }
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds /* get_crandom - correlated random number generator
931da177e4SLinus Torvalds  * Next number depends on last value.
941da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
951da177e4SLinus Torvalds  */
961da177e4SLinus Torvalds static unsigned long get_crandom(struct crndstate *state)
971da177e4SLinus Torvalds {
981da177e4SLinus Torvalds 	u64 value, rho;
991da177e4SLinus Torvalds 	unsigned long answer;
1001da177e4SLinus Torvalds 
1011da177e4SLinus Torvalds 	if (state->rho == 0)	/* no correllation */
1021da177e4SLinus Torvalds 		return net_random();
1031da177e4SLinus Torvalds 
1041da177e4SLinus Torvalds 	value = net_random();
1051da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1061da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1071da177e4SLinus Torvalds 	state->last = answer;
1081da177e4SLinus Torvalds 	return answer;
1091da177e4SLinus Torvalds }
1101da177e4SLinus Torvalds 
1111da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
1121da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
1131da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
1141da177e4SLinus Torvalds  */
1151da177e4SLinus Torvalds static long tabledist(unsigned long mu, long sigma,
1161da177e4SLinus Torvalds 		      struct crndstate *state, const struct disttable *dist)
1171da177e4SLinus Torvalds {
1181da177e4SLinus Torvalds 	long t, x;
1191da177e4SLinus Torvalds 	unsigned long rnd;
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds 	if (sigma == 0)
1221da177e4SLinus Torvalds 		return mu;
1231da177e4SLinus Torvalds 
1241da177e4SLinus Torvalds 	rnd = get_crandom(state);
1251da177e4SLinus Torvalds 
1261da177e4SLinus Torvalds 	/* default uniform distribution */
1271da177e4SLinus Torvalds 	if (dist == NULL)
1281da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
1291da177e4SLinus Torvalds 
1301da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
1311da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
1321da177e4SLinus Torvalds 	if (x >= 0)
1331da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
1341da177e4SLinus Torvalds 	else
1351da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
1361da177e4SLinus Torvalds 
1371da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
1381da177e4SLinus Torvalds }
1391da177e4SLinus Torvalds 
1400afb51e7SStephen Hemminger /*
1410afb51e7SStephen Hemminger  * Insert one skb into qdisc.
1420afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
1430afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
1440afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
1450afb51e7SStephen Hemminger  */
1461da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1471da177e4SLinus Torvalds {
1481da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
1490f9f32acSStephen Hemminger 	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
1500afb51e7SStephen Hemminger 	struct sk_buff *skb2;
1511da177e4SLinus Torvalds 	int ret;
1520afb51e7SStephen Hemminger 	int count = 1;
1531da177e4SLinus Torvalds 
154771018e7SStephen Hemminger 	pr_debug("netem_enqueue skb=%p\n", skb);
1551da177e4SLinus Torvalds 
1560afb51e7SStephen Hemminger 	/* Random duplication */
1570afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
1580afb51e7SStephen Hemminger 		++count;
1590afb51e7SStephen Hemminger 
1601da177e4SLinus Torvalds 	/* Random packet drop 0 => none, ~0 => all */
1610afb51e7SStephen Hemminger 	if (q->loss && q->loss >= get_crandom(&q->loss_cor))
1620afb51e7SStephen Hemminger 		--count;
1630afb51e7SStephen Hemminger 
1640afb51e7SStephen Hemminger 	if (count == 0) {
1651da177e4SLinus Torvalds 		sch->qstats.drops++;
1661da177e4SLinus Torvalds 		kfree_skb(skb);
1670afb51e7SStephen Hemminger 		return NET_XMIT_DROP;
1681da177e4SLinus Torvalds 	}
1691da177e4SLinus Torvalds 
1700afb51e7SStephen Hemminger 	/*
1710afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
1720afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
1730afb51e7SStephen Hemminger 	 * skb will be queued.
174d5d75cd6SStephen Hemminger 	 */
1750afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
1760afb51e7SStephen Hemminger 		struct Qdisc *rootq = sch->dev->qdisc;
1770afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
1780afb51e7SStephen Hemminger 		q->duplicate = 0;
179d5d75cd6SStephen Hemminger 
1800afb51e7SStephen Hemminger 		rootq->enqueue(skb2, rootq);
1810afb51e7SStephen Hemminger 		q->duplicate = dupsave;
1821da177e4SLinus Torvalds 	}
1831da177e4SLinus Torvalds 
1840dca51d3SStephen Hemminger 	if (q->gap == 0 		/* not doing reordering */
1850dca51d3SStephen Hemminger 	    || q->counter < q->gap 	/* inside last reordering gap */
1860dca51d3SStephen Hemminger 	    || q->reorder < get_crandom(&q->reorder_cor)) {
1870f9f32acSStephen Hemminger 		psched_time_t now;
18807aaa115SStephen Hemminger 		psched_tdiff_t delay;
18907aaa115SStephen Hemminger 
19007aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
19107aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
19207aaa115SStephen Hemminger 
1930f9f32acSStephen Hemminger 		PSCHED_GET_TIME(now);
19407aaa115SStephen Hemminger 		PSCHED_TADD2(now, delay, cb->time_to_send);
1951da177e4SLinus Torvalds 		++q->counter;
1961da177e4SLinus Torvalds 		ret = q->qdisc->enqueue(skb, q->qdisc);
1971da177e4SLinus Torvalds 	} else {
1980dca51d3SStephen Hemminger 		/*
1990dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
2000dca51d3SStephen Hemminger 		 * of the queue.
2010dca51d3SStephen Hemminger 		 */
2020f9f32acSStephen Hemminger 		PSCHED_GET_TIME(cb->time_to_send);
2030dca51d3SStephen Hemminger 		q->counter = 0;
2040f9f32acSStephen Hemminger 		ret = q->qdisc->ops->requeue(skb, q->qdisc);
2051da177e4SLinus Torvalds 	}
2061da177e4SLinus Torvalds 
2071da177e4SLinus Torvalds 	if (likely(ret == NET_XMIT_SUCCESS)) {
2081da177e4SLinus Torvalds 		sch->q.qlen++;
2091da177e4SLinus Torvalds 		sch->bstats.bytes += skb->len;
2101da177e4SLinus Torvalds 		sch->bstats.packets++;
2111da177e4SLinus Torvalds 	} else
2121da177e4SLinus Torvalds 		sch->qstats.drops++;
2131da177e4SLinus Torvalds 
214d5d75cd6SStephen Hemminger 	pr_debug("netem: enqueue ret %d\n", ret);
2151da177e4SLinus Torvalds 	return ret;
2161da177e4SLinus Torvalds }
2171da177e4SLinus Torvalds 
2181da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */
2191da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
2201da177e4SLinus Torvalds {
2211da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2221da177e4SLinus Torvalds 	int ret;
2231da177e4SLinus Torvalds 
2241da177e4SLinus Torvalds 	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
2251da177e4SLinus Torvalds 		sch->q.qlen++;
2261da177e4SLinus Torvalds 		sch->qstats.requeues++;
2271da177e4SLinus Torvalds 	}
2281da177e4SLinus Torvalds 
2291da177e4SLinus Torvalds 	return ret;
2301da177e4SLinus Torvalds }
2311da177e4SLinus Torvalds 
2321da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch)
2331da177e4SLinus Torvalds {
2341da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2351da177e4SLinus Torvalds 	unsigned int len;
2361da177e4SLinus Torvalds 
2371da177e4SLinus Torvalds 	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
2381da177e4SLinus Torvalds 		sch->q.qlen--;
2391da177e4SLinus Torvalds 		sch->qstats.drops++;
2401da177e4SLinus Torvalds 	}
2411da177e4SLinus Torvalds 	return len;
2421da177e4SLinus Torvalds }
2431da177e4SLinus Torvalds 
2441da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
2451da177e4SLinus Torvalds {
2461da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2471da177e4SLinus Torvalds 	struct sk_buff *skb;
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds 	skb = q->qdisc->dequeue(q->qdisc);
250771018e7SStephen Hemminger 	if (skb) {
2510f9f32acSStephen Hemminger 		const struct netem_skb_cb *cb
2520f9f32acSStephen Hemminger 			= (const struct netem_skb_cb *)skb->cb;
2530f9f32acSStephen Hemminger 		psched_time_t now;
2540f9f32acSStephen Hemminger 
2550f9f32acSStephen Hemminger 		/* if more time remaining? */
2560f9f32acSStephen Hemminger 		PSCHED_GET_TIME(now);
25707aaa115SStephen Hemminger 
25807aaa115SStephen Hemminger 		if (PSCHED_TLESS(cb->time_to_send, now)) {
259771018e7SStephen Hemminger 			pr_debug("netem_dequeue: return skb=%p\n", skb);
2601da177e4SLinus Torvalds 			sch->q.qlen--;
261771018e7SStephen Hemminger 			sch->flags &= ~TCQ_F_THROTTLED;
2620f9f32acSStephen Hemminger 			return skb;
26307aaa115SStephen Hemminger 		} else {
26407aaa115SStephen Hemminger 			psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
26507aaa115SStephen Hemminger 
26607aaa115SStephen Hemminger 			if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
26707aaa115SStephen Hemminger 				sch->qstats.drops++;
26807aaa115SStephen Hemminger 
26907aaa115SStephen Hemminger 				/* After this qlen is confused */
27007aaa115SStephen Hemminger 				printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
27107aaa115SStephen Hemminger 				       q->qdisc->ops->id);
27207aaa115SStephen Hemminger 
27307aaa115SStephen Hemminger 				sch->q.qlen--;
274771018e7SStephen Hemminger 			}
275771018e7SStephen Hemminger 
27607aaa115SStephen Hemminger 			mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
2770f9f32acSStephen Hemminger 			sch->flags |= TCQ_F_THROTTLED;
27807aaa115SStephen Hemminger 		}
2790f9f32acSStephen Hemminger 	}
2800f9f32acSStephen Hemminger 
2810f9f32acSStephen Hemminger 	return NULL;
2821da177e4SLinus Torvalds }
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds static void netem_watchdog(unsigned long arg)
2851da177e4SLinus Torvalds {
2861da177e4SLinus Torvalds 	struct Qdisc *sch = (struct Qdisc *)arg;
2871da177e4SLinus Torvalds 
288771018e7SStephen Hemminger 	pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
289771018e7SStephen Hemminger 	sch->flags &= ~TCQ_F_THROTTLED;
290771018e7SStephen Hemminger 	netif_schedule(sch->dev);
2911da177e4SLinus Torvalds }
2921da177e4SLinus Torvalds 
2931da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
2941da177e4SLinus Torvalds {
2951da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
2961da177e4SLinus Torvalds 
2971da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
2981da177e4SLinus Torvalds 	sch->q.qlen = 0;
299771018e7SStephen Hemminger 	sch->flags &= ~TCQ_F_THROTTLED;
3001da177e4SLinus Torvalds 	del_timer_sync(&q->timer);
3011da177e4SLinus Torvalds }
3021da177e4SLinus Torvalds 
303300ce174SStephen Hemminger /* Pass size change message down to embedded FIFO */
3041da177e4SLinus Torvalds static int set_fifo_limit(struct Qdisc *q, int limit)
3051da177e4SLinus Torvalds {
3061da177e4SLinus Torvalds         struct rtattr *rta;
3071da177e4SLinus Torvalds 	int ret = -ENOMEM;
3081da177e4SLinus Torvalds 
309300ce174SStephen Hemminger 	/* Hack to avoid sending change message to non-FIFO */
310300ce174SStephen Hemminger 	if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
311300ce174SStephen Hemminger 		return 0;
312300ce174SStephen Hemminger 
3131da177e4SLinus Torvalds 	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
3141da177e4SLinus Torvalds 	if (rta) {
3151da177e4SLinus Torvalds 		rta->rta_type = RTM_NEWQDISC;
3161da177e4SLinus Torvalds 		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
3171da177e4SLinus Torvalds 		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
3181da177e4SLinus Torvalds 
3191da177e4SLinus Torvalds 		ret = q->ops->change(q, rta);
3201da177e4SLinus Torvalds 		kfree(rta);
3211da177e4SLinus Torvalds 	}
3221da177e4SLinus Torvalds 	return ret;
3231da177e4SLinus Torvalds }
3241da177e4SLinus Torvalds 
3251da177e4SLinus Torvalds /*
3261da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
3271da177e4SLinus Torvalds  * signed 16 bit values.
3281da177e4SLinus Torvalds  */
3291da177e4SLinus Torvalds static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
3301da177e4SLinus Torvalds {
3311da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3321da177e4SLinus Torvalds 	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
3331da177e4SLinus Torvalds 	const __s16 *data = RTA_DATA(attr);
3341da177e4SLinus Torvalds 	struct disttable *d;
3351da177e4SLinus Torvalds 	int i;
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	if (n > 65536)
3381da177e4SLinus Torvalds 		return -EINVAL;
3391da177e4SLinus Torvalds 
3401da177e4SLinus Torvalds 	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
3411da177e4SLinus Torvalds 	if (!d)
3421da177e4SLinus Torvalds 		return -ENOMEM;
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds 	d->size = n;
3451da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
3461da177e4SLinus Torvalds 		d->table[i] = data[i];
3471da177e4SLinus Torvalds 
3481da177e4SLinus Torvalds 	spin_lock_bh(&sch->dev->queue_lock);
3491da177e4SLinus Torvalds 	d = xchg(&q->delay_dist, d);
3501da177e4SLinus Torvalds 	spin_unlock_bh(&sch->dev->queue_lock);
3511da177e4SLinus Torvalds 
3521da177e4SLinus Torvalds 	kfree(d);
3531da177e4SLinus Torvalds 	return 0;
3541da177e4SLinus Torvalds }
3551da177e4SLinus Torvalds 
3561da177e4SLinus Torvalds static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
3571da177e4SLinus Torvalds {
3581da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3591da177e4SLinus Torvalds 	const struct tc_netem_corr *c = RTA_DATA(attr);
3601da177e4SLinus Torvalds 
3611da177e4SLinus Torvalds 	if (RTA_PAYLOAD(attr) != sizeof(*c))
3621da177e4SLinus Torvalds 		return -EINVAL;
3631da177e4SLinus Torvalds 
3641da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
3651da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
3661da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
3671da177e4SLinus Torvalds 	return 0;
3681da177e4SLinus Torvalds }
3691da177e4SLinus Torvalds 
3700dca51d3SStephen Hemminger static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
3710dca51d3SStephen Hemminger {
3720dca51d3SStephen Hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3730dca51d3SStephen Hemminger 	const struct tc_netem_reorder *r = RTA_DATA(attr);
3740dca51d3SStephen Hemminger 
3750dca51d3SStephen Hemminger 	if (RTA_PAYLOAD(attr) != sizeof(*r))
3760dca51d3SStephen Hemminger 		return -EINVAL;
3770dca51d3SStephen Hemminger 
3780dca51d3SStephen Hemminger 	q->reorder = r->probability;
3790dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
3800dca51d3SStephen Hemminger 	return 0;
3810dca51d3SStephen Hemminger }
3820dca51d3SStephen Hemminger 
3831da177e4SLinus Torvalds static int netem_change(struct Qdisc *sch, struct rtattr *opt)
3841da177e4SLinus Torvalds {
3851da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
3861da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
3871da177e4SLinus Torvalds 	int ret;
3881da177e4SLinus Torvalds 
3891da177e4SLinus Torvalds 	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
3901da177e4SLinus Torvalds 		return -EINVAL;
3911da177e4SLinus Torvalds 
3921da177e4SLinus Torvalds 	qopt = RTA_DATA(opt);
3931da177e4SLinus Torvalds 	ret = set_fifo_limit(q->qdisc, qopt->limit);
3941da177e4SLinus Torvalds 	if (ret) {
3951da177e4SLinus Torvalds 		pr_debug("netem: can't set fifo limit\n");
3961da177e4SLinus Torvalds 		return ret;
3971da177e4SLinus Torvalds 	}
3981da177e4SLinus Torvalds 
3991da177e4SLinus Torvalds 	q->latency = qopt->latency;
4001da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
4011da177e4SLinus Torvalds 	q->limit = qopt->limit;
4021da177e4SLinus Torvalds 	q->gap = qopt->gap;
4030dca51d3SStephen Hemminger 	q->counter = 0;
4041da177e4SLinus Torvalds 	q->loss = qopt->loss;
4051da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
4061da177e4SLinus Torvalds 
4070dca51d3SStephen Hemminger 	/* for compatiablity with earlier versions.
4080dca51d3SStephen Hemminger 	 * if gap is set, need to assume 100% probablity
4090dca51d3SStephen Hemminger 	 */
4100dca51d3SStephen Hemminger 	q->reorder = ~0;
4110dca51d3SStephen Hemminger 
4121da177e4SLinus Torvalds 	/* Handle nested options after initial queue options.
4131da177e4SLinus Torvalds 	 * Should have put all options in nested format but too late now.
4141da177e4SLinus Torvalds 	 */
4151da177e4SLinus Torvalds 	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
4161da177e4SLinus Torvalds 		struct rtattr *tb[TCA_NETEM_MAX];
4171da177e4SLinus Torvalds 		if (rtattr_parse(tb, TCA_NETEM_MAX,
4181da177e4SLinus Torvalds 				 RTA_DATA(opt) + sizeof(*qopt),
4191da177e4SLinus Torvalds 				 RTA_PAYLOAD(opt) - sizeof(*qopt)))
4201da177e4SLinus Torvalds 			return -EINVAL;
4211da177e4SLinus Torvalds 
4221da177e4SLinus Torvalds 		if (tb[TCA_NETEM_CORR-1]) {
4231da177e4SLinus Torvalds 			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
4241da177e4SLinus Torvalds 			if (ret)
4251da177e4SLinus Torvalds 				return ret;
4261da177e4SLinus Torvalds 		}
4271da177e4SLinus Torvalds 
4281da177e4SLinus Torvalds 		if (tb[TCA_NETEM_DELAY_DIST-1]) {
4291da177e4SLinus Torvalds 			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
4301da177e4SLinus Torvalds 			if (ret)
4311da177e4SLinus Torvalds 				return ret;
4321da177e4SLinus Torvalds 		}
4330dca51d3SStephen Hemminger 		if (tb[TCA_NETEM_REORDER-1]) {
4340dca51d3SStephen Hemminger 			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
4350dca51d3SStephen Hemminger 			if (ret)
4360dca51d3SStephen Hemminger 				return ret;
4370dca51d3SStephen Hemminger 		}
4381da177e4SLinus Torvalds 	}
4391da177e4SLinus Torvalds 
4401da177e4SLinus Torvalds 
4411da177e4SLinus Torvalds 	return 0;
4421da177e4SLinus Torvalds }
4431da177e4SLinus Torvalds 
444300ce174SStephen Hemminger /*
445300ce174SStephen Hemminger  * Special case version of FIFO queue for use by netem.
446300ce174SStephen Hemminger  * It queues in order based on timestamps in skb's
447300ce174SStephen Hemminger  */
448300ce174SStephen Hemminger struct fifo_sched_data {
449300ce174SStephen Hemminger 	u32 limit;
450300ce174SStephen Hemminger };
451300ce174SStephen Hemminger 
452300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
453300ce174SStephen Hemminger {
454300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
455300ce174SStephen Hemminger 	struct sk_buff_head *list = &sch->q;
456300ce174SStephen Hemminger 	const struct netem_skb_cb *ncb
457300ce174SStephen Hemminger 		= (const struct netem_skb_cb *)nskb->cb;
458300ce174SStephen Hemminger 	struct sk_buff *skb;
459300ce174SStephen Hemminger 
460300ce174SStephen Hemminger 	if (likely(skb_queue_len(list) < q->limit)) {
461300ce174SStephen Hemminger 		skb_queue_reverse_walk(list, skb) {
462300ce174SStephen Hemminger 			const struct netem_skb_cb *cb
463300ce174SStephen Hemminger 				= (const struct netem_skb_cb *)skb->cb;
464300ce174SStephen Hemminger 
465300ce174SStephen Hemminger 			if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send))
466300ce174SStephen Hemminger 				break;
467300ce174SStephen Hemminger 		}
468300ce174SStephen Hemminger 
469300ce174SStephen Hemminger 		__skb_queue_after(list, skb, nskb);
470300ce174SStephen Hemminger 
471300ce174SStephen Hemminger 		sch->qstats.backlog += nskb->len;
472300ce174SStephen Hemminger 		sch->bstats.bytes += nskb->len;
473300ce174SStephen Hemminger 		sch->bstats.packets++;
474300ce174SStephen Hemminger 
475300ce174SStephen Hemminger 		return NET_XMIT_SUCCESS;
476300ce174SStephen Hemminger 	}
477300ce174SStephen Hemminger 
478300ce174SStephen Hemminger 	return qdisc_drop(nskb, sch);
479300ce174SStephen Hemminger }
480300ce174SStephen Hemminger 
481300ce174SStephen Hemminger static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
482300ce174SStephen Hemminger {
483300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
484300ce174SStephen Hemminger 
485300ce174SStephen Hemminger 	if (opt) {
486300ce174SStephen Hemminger 		struct tc_fifo_qopt *ctl = RTA_DATA(opt);
487300ce174SStephen Hemminger 		if (RTA_PAYLOAD(opt) < sizeof(*ctl))
488300ce174SStephen Hemminger 			return -EINVAL;
489300ce174SStephen Hemminger 
490300ce174SStephen Hemminger 		q->limit = ctl->limit;
491300ce174SStephen Hemminger 	} else
492300ce174SStephen Hemminger 		q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
493300ce174SStephen Hemminger 
494300ce174SStephen Hemminger 	return 0;
495300ce174SStephen Hemminger }
496300ce174SStephen Hemminger 
497300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
498300ce174SStephen Hemminger {
499300ce174SStephen Hemminger 	struct fifo_sched_data *q = qdisc_priv(sch);
500300ce174SStephen Hemminger 	struct tc_fifo_qopt opt = { .limit = q->limit };
501300ce174SStephen Hemminger 
502300ce174SStephen Hemminger 	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
503300ce174SStephen Hemminger 	return skb->len;
504300ce174SStephen Hemminger 
505300ce174SStephen Hemminger rtattr_failure:
506300ce174SStephen Hemminger 	return -1;
507300ce174SStephen Hemminger }
508300ce174SStephen Hemminger 
509300ce174SStephen Hemminger static struct Qdisc_ops tfifo_qdisc_ops = {
510300ce174SStephen Hemminger 	.id		=	"tfifo",
511300ce174SStephen Hemminger 	.priv_size	=	sizeof(struct fifo_sched_data),
512300ce174SStephen Hemminger 	.enqueue	=	tfifo_enqueue,
513300ce174SStephen Hemminger 	.dequeue	=	qdisc_dequeue_head,
514300ce174SStephen Hemminger 	.requeue	=	qdisc_requeue,
515300ce174SStephen Hemminger 	.drop		=	qdisc_queue_drop,
516300ce174SStephen Hemminger 	.init		=	tfifo_init,
517300ce174SStephen Hemminger 	.reset		=	qdisc_reset_queue,
518300ce174SStephen Hemminger 	.change		=	tfifo_init,
519300ce174SStephen Hemminger 	.dump		=	tfifo_dump,
520300ce174SStephen Hemminger };
521300ce174SStephen Hemminger 
5221da177e4SLinus Torvalds static int netem_init(struct Qdisc *sch, struct rtattr *opt)
5231da177e4SLinus Torvalds {
5241da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5251da177e4SLinus Torvalds 	int ret;
5261da177e4SLinus Torvalds 
5271da177e4SLinus Torvalds 	if (!opt)
5281da177e4SLinus Torvalds 		return -EINVAL;
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds 	init_timer(&q->timer);
5311da177e4SLinus Torvalds 	q->timer.function = netem_watchdog;
5321da177e4SLinus Torvalds 	q->timer.data = (unsigned long) sch;
5331da177e4SLinus Torvalds 
534300ce174SStephen Hemminger 	q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops);
5351da177e4SLinus Torvalds 	if (!q->qdisc) {
5361da177e4SLinus Torvalds 		pr_debug("netem: qdisc create failed\n");
5371da177e4SLinus Torvalds 		return -ENOMEM;
5381da177e4SLinus Torvalds 	}
5391da177e4SLinus Torvalds 
5401da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
5411da177e4SLinus Torvalds 	if (ret) {
5421da177e4SLinus Torvalds 		pr_debug("netem: change failed\n");
5431da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
5441da177e4SLinus Torvalds 	}
5451da177e4SLinus Torvalds 	return ret;
5461da177e4SLinus Torvalds }
5471da177e4SLinus Torvalds 
5481da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
5491da177e4SLinus Torvalds {
5501da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5511da177e4SLinus Torvalds 
5521da177e4SLinus Torvalds 	del_timer_sync(&q->timer);
5531da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
5541da177e4SLinus Torvalds 	kfree(q->delay_dist);
5551da177e4SLinus Torvalds }
5561da177e4SLinus Torvalds 
5571da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
5581da177e4SLinus Torvalds {
5591da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
5601da177e4SLinus Torvalds 	unsigned char	 *b = skb->tail;
5611da177e4SLinus Torvalds 	struct rtattr *rta = (struct rtattr *) b;
5621da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
5631da177e4SLinus Torvalds 	struct tc_netem_corr cor;
5640dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
5651da177e4SLinus Torvalds 
5661da177e4SLinus Torvalds 	qopt.latency = q->latency;
5671da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
5681da177e4SLinus Torvalds 	qopt.limit = q->limit;
5691da177e4SLinus Torvalds 	qopt.loss = q->loss;
5701da177e4SLinus Torvalds 	qopt.gap = q->gap;
5711da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
5721da177e4SLinus Torvalds 	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
5731da177e4SLinus Torvalds 
5741da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
5751da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
5761da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
5771da177e4SLinus Torvalds 	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
5780dca51d3SStephen Hemminger 
5790dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
5800dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
5810dca51d3SStephen Hemminger 	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
5820dca51d3SStephen Hemminger 
5831da177e4SLinus Torvalds 	rta->rta_len = skb->tail - b;
5841da177e4SLinus Torvalds 
5851da177e4SLinus Torvalds 	return skb->len;
5861da177e4SLinus Torvalds 
5871da177e4SLinus Torvalds rtattr_failure:
5881da177e4SLinus Torvalds 	skb_trim(skb, b - skb->data);
5891da177e4SLinus Torvalds 	return -1;
5901da177e4SLinus Torvalds }
5911da177e4SLinus Torvalds 
5921da177e4SLinus Torvalds static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
5931da177e4SLinus Torvalds 			  struct sk_buff *skb, struct tcmsg *tcm)
5941da177e4SLinus Torvalds {
5951da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
5961da177e4SLinus Torvalds 
5971da177e4SLinus Torvalds 	if (cl != 1) 	/* only one class */
5981da177e4SLinus Torvalds 		return -ENOENT;
5991da177e4SLinus Torvalds 
6001da177e4SLinus Torvalds 	tcm->tcm_handle |= TC_H_MIN(1);
6011da177e4SLinus Torvalds 	tcm->tcm_info = q->qdisc->handle;
6021da177e4SLinus Torvalds 
6031da177e4SLinus Torvalds 	return 0;
6041da177e4SLinus Torvalds }
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
6071da177e4SLinus Torvalds 		     struct Qdisc **old)
6081da177e4SLinus Torvalds {
6091da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6101da177e4SLinus Torvalds 
6111da177e4SLinus Torvalds 	if (new == NULL)
6121da177e4SLinus Torvalds 		new = &noop_qdisc;
6131da177e4SLinus Torvalds 
6141da177e4SLinus Torvalds 	sch_tree_lock(sch);
6151da177e4SLinus Torvalds 	*old = xchg(&q->qdisc, new);
6161da177e4SLinus Torvalds 	qdisc_reset(*old);
6171da177e4SLinus Torvalds 	sch->q.qlen = 0;
6181da177e4SLinus Torvalds 	sch_tree_unlock(sch);
6191da177e4SLinus Torvalds 
6201da177e4SLinus Torvalds 	return 0;
6211da177e4SLinus Torvalds }
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
6241da177e4SLinus Torvalds {
6251da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6261da177e4SLinus Torvalds 	return q->qdisc;
6271da177e4SLinus Torvalds }
6281da177e4SLinus Torvalds 
6291da177e4SLinus Torvalds static unsigned long netem_get(struct Qdisc *sch, u32 classid)
6301da177e4SLinus Torvalds {
6311da177e4SLinus Torvalds 	return 1;
6321da177e4SLinus Torvalds }
6331da177e4SLinus Torvalds 
6341da177e4SLinus Torvalds static void netem_put(struct Qdisc *sch, unsigned long arg)
6351da177e4SLinus Torvalds {
6361da177e4SLinus Torvalds }
6371da177e4SLinus Torvalds 
6381da177e4SLinus Torvalds static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
6391da177e4SLinus Torvalds 			    struct rtattr **tca, unsigned long *arg)
6401da177e4SLinus Torvalds {
6411da177e4SLinus Torvalds 	return -ENOSYS;
6421da177e4SLinus Torvalds }
6431da177e4SLinus Torvalds 
6441da177e4SLinus Torvalds static int netem_delete(struct Qdisc *sch, unsigned long arg)
6451da177e4SLinus Torvalds {
6461da177e4SLinus Torvalds 	return -ENOSYS;
6471da177e4SLinus Torvalds }
6481da177e4SLinus Torvalds 
6491da177e4SLinus Torvalds static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
6501da177e4SLinus Torvalds {
6511da177e4SLinus Torvalds 	if (!walker->stop) {
6521da177e4SLinus Torvalds 		if (walker->count >= walker->skip)
6531da177e4SLinus Torvalds 			if (walker->fn(sch, 1, walker) < 0) {
6541da177e4SLinus Torvalds 				walker->stop = 1;
6551da177e4SLinus Torvalds 				return;
6561da177e4SLinus Torvalds 			}
6571da177e4SLinus Torvalds 		walker->count++;
6581da177e4SLinus Torvalds 	}
6591da177e4SLinus Torvalds }
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
6621da177e4SLinus Torvalds {
6631da177e4SLinus Torvalds 	return NULL;
6641da177e4SLinus Torvalds }
6651da177e4SLinus Torvalds 
6661da177e4SLinus Torvalds static struct Qdisc_class_ops netem_class_ops = {
6671da177e4SLinus Torvalds 	.graft		=	netem_graft,
6681da177e4SLinus Torvalds 	.leaf		=	netem_leaf,
6691da177e4SLinus Torvalds 	.get		=	netem_get,
6701da177e4SLinus Torvalds 	.put		=	netem_put,
6711da177e4SLinus Torvalds 	.change		=	netem_change_class,
6721da177e4SLinus Torvalds 	.delete		=	netem_delete,
6731da177e4SLinus Torvalds 	.walk		=	netem_walk,
6741da177e4SLinus Torvalds 	.tcf_chain	=	netem_find_tcf,
6751da177e4SLinus Torvalds 	.dump		=	netem_dump_class,
6761da177e4SLinus Torvalds };
6771da177e4SLinus Torvalds 
6781da177e4SLinus Torvalds static struct Qdisc_ops netem_qdisc_ops = {
6791da177e4SLinus Torvalds 	.id		=	"netem",
6801da177e4SLinus Torvalds 	.cl_ops		=	&netem_class_ops,
6811da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
6821da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
6831da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
6841da177e4SLinus Torvalds 	.requeue	=	netem_requeue,
6851da177e4SLinus Torvalds 	.drop		=	netem_drop,
6861da177e4SLinus Torvalds 	.init		=	netem_init,
6871da177e4SLinus Torvalds 	.reset		=	netem_reset,
6881da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
6891da177e4SLinus Torvalds 	.change		=	netem_change,
6901da177e4SLinus Torvalds 	.dump		=	netem_dump,
6911da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
6921da177e4SLinus Torvalds };
6931da177e4SLinus Torvalds 
6941da177e4SLinus Torvalds 
6951da177e4SLinus Torvalds static int __init netem_module_init(void)
6961da177e4SLinus Torvalds {
6971da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
6981da177e4SLinus Torvalds }
6991da177e4SLinus Torvalds static void __exit netem_module_exit(void)
7001da177e4SLinus Torvalds {
7011da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
7021da177e4SLinus Torvalds }
7031da177e4SLinus Torvalds module_init(netem_module_init)
7041da177e4SLinus Torvalds module_exit(netem_module_exit)
7051da177e4SLinus Torvalds MODULE_LICENSE("GPL");
706