xref: /openbmc/linux/net/sched/sch_netem.c (revision 1da177e4)
1*1da177e4SLinus Torvalds /*
2*1da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
3*1da177e4SLinus Torvalds  *
4*1da177e4SLinus Torvalds  * 		This program is free software; you can redistribute it and/or
5*1da177e4SLinus Torvalds  * 		modify it under the terms of the GNU General Public License
6*1da177e4SLinus Torvalds  * 		as published by the Free Software Foundation; either version
7*1da177e4SLinus Torvalds  * 		2 of the License, or (at your option) any later version.
8*1da177e4SLinus Torvalds  *
9*1da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
10*1da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
11*1da177e4SLinus Torvalds  *
12*1da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
13*1da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14*1da177e4SLinus Torvalds  */
15*1da177e4SLinus Torvalds 
16*1da177e4SLinus Torvalds #include <linux/config.h>
17*1da177e4SLinus Torvalds #include <linux/module.h>
18*1da177e4SLinus Torvalds #include <linux/bitops.h>
19*1da177e4SLinus Torvalds #include <linux/types.h>
20*1da177e4SLinus Torvalds #include <linux/kernel.h>
21*1da177e4SLinus Torvalds #include <linux/errno.h>
22*1da177e4SLinus Torvalds #include <linux/netdevice.h>
23*1da177e4SLinus Torvalds #include <linux/skbuff.h>
24*1da177e4SLinus Torvalds #include <linux/rtnetlink.h>
25*1da177e4SLinus Torvalds 
26*1da177e4SLinus Torvalds #include <net/pkt_sched.h>
27*1da177e4SLinus Torvalds 
28*1da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
29*1da177e4SLinus Torvalds 	====================================
30*1da177e4SLinus Torvalds 
31*1da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
32*1da177e4SLinus Torvalds 		 Network Emulation Tool
33*1da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
34*1da177e4SLinus Torvalds 
35*1da177e4SLinus Torvalds 	 ----------------------------------------------------------------
36*1da177e4SLinus Torvalds 
37*1da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
38*1da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
39*1da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
40*1da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
41*1da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
42*1da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
43*1da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
44*1da177e4SLinus Torvalds 
45*1da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
46*1da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
47*1da177e4SLinus Torvalds 	 control either since that can be handled by using token
48*1da177e4SLinus Torvalds 	 bucket or other rate control.
49*1da177e4SLinus Torvalds 
50*1da177e4SLinus Torvalds 	 The simulator is limited by the Linux timer resolution
51*1da177e4SLinus Torvalds 	 and will create packet bursts on the HZ boundary (1ms).
52*1da177e4SLinus Torvalds */
53*1da177e4SLinus Torvalds 
54*1da177e4SLinus Torvalds struct netem_sched_data {
55*1da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
56*1da177e4SLinus Torvalds 	struct sk_buff_head delayed;
57*1da177e4SLinus Torvalds 	struct timer_list timer;
58*1da177e4SLinus Torvalds 
59*1da177e4SLinus Torvalds 	u32 latency;
60*1da177e4SLinus Torvalds 	u32 loss;
61*1da177e4SLinus Torvalds 	u32 limit;
62*1da177e4SLinus Torvalds 	u32 counter;
63*1da177e4SLinus Torvalds 	u32 gap;
64*1da177e4SLinus Torvalds 	u32 jitter;
65*1da177e4SLinus Torvalds 	u32 duplicate;
66*1da177e4SLinus Torvalds 
67*1da177e4SLinus Torvalds 	struct crndstate {
68*1da177e4SLinus Torvalds 		unsigned long last;
69*1da177e4SLinus Torvalds 		unsigned long rho;
70*1da177e4SLinus Torvalds 	} delay_cor, loss_cor, dup_cor;
71*1da177e4SLinus Torvalds 
72*1da177e4SLinus Torvalds 	struct disttable {
73*1da177e4SLinus Torvalds 		u32  size;
74*1da177e4SLinus Torvalds 		s16 table[0];
75*1da177e4SLinus Torvalds 	} *delay_dist;
76*1da177e4SLinus Torvalds };
77*1da177e4SLinus Torvalds 
78*1da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */
79*1da177e4SLinus Torvalds struct netem_skb_cb {
80*1da177e4SLinus Torvalds 	psched_time_t	time_to_send;
81*1da177e4SLinus Torvalds };
82*1da177e4SLinus Torvalds 
83*1da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
84*1da177e4SLinus Torvalds  * Use entropy source for initial seed.
85*1da177e4SLinus Torvalds  */
86*1da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
87*1da177e4SLinus Torvalds {
88*1da177e4SLinus Torvalds 	state->rho = rho;
89*1da177e4SLinus Torvalds 	state->last = net_random();
90*1da177e4SLinus Torvalds }
91*1da177e4SLinus Torvalds 
92*1da177e4SLinus Torvalds /* get_crandom - correlated random number generator
93*1da177e4SLinus Torvalds  * Next number depends on last value.
94*1da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
95*1da177e4SLinus Torvalds  */
96*1da177e4SLinus Torvalds static unsigned long get_crandom(struct crndstate *state)
97*1da177e4SLinus Torvalds {
98*1da177e4SLinus Torvalds 	u64 value, rho;
99*1da177e4SLinus Torvalds 	unsigned long answer;
100*1da177e4SLinus Torvalds 
101*1da177e4SLinus Torvalds 	if (state->rho == 0)	/* no correllation */
102*1da177e4SLinus Torvalds 		return net_random();
103*1da177e4SLinus Torvalds 
104*1da177e4SLinus Torvalds 	value = net_random();
105*1da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
106*1da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
107*1da177e4SLinus Torvalds 	state->last = answer;
108*1da177e4SLinus Torvalds 	return answer;
109*1da177e4SLinus Torvalds }
110*1da177e4SLinus Torvalds 
111*1da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
112*1da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
113*1da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
114*1da177e4SLinus Torvalds  */
115*1da177e4SLinus Torvalds static long tabledist(unsigned long mu, long sigma,
116*1da177e4SLinus Torvalds 		      struct crndstate *state, const struct disttable *dist)
117*1da177e4SLinus Torvalds {
118*1da177e4SLinus Torvalds 	long t, x;
119*1da177e4SLinus Torvalds 	unsigned long rnd;
120*1da177e4SLinus Torvalds 
121*1da177e4SLinus Torvalds 	if (sigma == 0)
122*1da177e4SLinus Torvalds 		return mu;
123*1da177e4SLinus Torvalds 
124*1da177e4SLinus Torvalds 	rnd = get_crandom(state);
125*1da177e4SLinus Torvalds 
126*1da177e4SLinus Torvalds 	/* default uniform distribution */
127*1da177e4SLinus Torvalds 	if (dist == NULL)
128*1da177e4SLinus Torvalds 		return (rnd % (2*sigma)) - sigma + mu;
129*1da177e4SLinus Torvalds 
130*1da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
131*1da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
132*1da177e4SLinus Torvalds 	if (x >= 0)
133*1da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
134*1da177e4SLinus Torvalds 	else
135*1da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
136*1da177e4SLinus Torvalds 
137*1da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
138*1da177e4SLinus Torvalds }
139*1da177e4SLinus Torvalds 
140*1da177e4SLinus Torvalds /* Put skb in the private delayed queue. */
141*1da177e4SLinus Torvalds static int delay_skb(struct Qdisc *sch, struct sk_buff *skb)
142*1da177e4SLinus Torvalds {
143*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
144*1da177e4SLinus Torvalds 	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
145*1da177e4SLinus Torvalds 	psched_tdiff_t td;
146*1da177e4SLinus Torvalds 	psched_time_t now;
147*1da177e4SLinus Torvalds 
148*1da177e4SLinus Torvalds 	PSCHED_GET_TIME(now);
149*1da177e4SLinus Torvalds 	td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
150*1da177e4SLinus Torvalds 	PSCHED_TADD2(now, td, cb->time_to_send);
151*1da177e4SLinus Torvalds 
152*1da177e4SLinus Torvalds 	/* Always queue at tail to keep packets in order */
153*1da177e4SLinus Torvalds 	if (likely(q->delayed.qlen < q->limit)) {
154*1da177e4SLinus Torvalds 		__skb_queue_tail(&q->delayed, skb);
155*1da177e4SLinus Torvalds 		if (!timer_pending(&q->timer)) {
156*1da177e4SLinus Torvalds 			q->timer.expires = jiffies + PSCHED_US2JIFFIE(td);
157*1da177e4SLinus Torvalds 			add_timer(&q->timer);
158*1da177e4SLinus Torvalds 		}
159*1da177e4SLinus Torvalds 		return NET_XMIT_SUCCESS;
160*1da177e4SLinus Torvalds 	}
161*1da177e4SLinus Torvalds 
162*1da177e4SLinus Torvalds 	kfree_skb(skb);
163*1da177e4SLinus Torvalds 	return NET_XMIT_DROP;
164*1da177e4SLinus Torvalds }
165*1da177e4SLinus Torvalds 
166*1da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
167*1da177e4SLinus Torvalds {
168*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
169*1da177e4SLinus Torvalds 	struct sk_buff *skb2;
170*1da177e4SLinus Torvalds 	int ret;
171*1da177e4SLinus Torvalds 
172*1da177e4SLinus Torvalds 	pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies);
173*1da177e4SLinus Torvalds 
174*1da177e4SLinus Torvalds 	/* Random packet drop 0 => none, ~0 => all */
175*1da177e4SLinus Torvalds 	if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
176*1da177e4SLinus Torvalds 		pr_debug("netem_enqueue: random loss\n");
177*1da177e4SLinus Torvalds 		sch->qstats.drops++;
178*1da177e4SLinus Torvalds 		kfree_skb(skb);
179*1da177e4SLinus Torvalds 		return 0;	/* lie about loss so TCP doesn't know */
180*1da177e4SLinus Torvalds 	}
181*1da177e4SLinus Torvalds 
182*1da177e4SLinus Torvalds 	/* Random duplication */
183*1da177e4SLinus Torvalds 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)
184*1da177e4SLinus Torvalds 	    && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
185*1da177e4SLinus Torvalds 		pr_debug("netem_enqueue: dup %p\n", skb2);
186*1da177e4SLinus Torvalds 
187*1da177e4SLinus Torvalds 		if (delay_skb(sch, skb2)) {
188*1da177e4SLinus Torvalds 			sch->q.qlen++;
189*1da177e4SLinus Torvalds 			sch->bstats.bytes += skb2->len;
190*1da177e4SLinus Torvalds 			sch->bstats.packets++;
191*1da177e4SLinus Torvalds 		} else
192*1da177e4SLinus Torvalds 			sch->qstats.drops++;
193*1da177e4SLinus Torvalds 	}
194*1da177e4SLinus Torvalds 
195*1da177e4SLinus Torvalds 	/* If doing simple delay then gap == 0 so all packets
196*1da177e4SLinus Torvalds 	 * go into the delayed holding queue
197*1da177e4SLinus Torvalds 	 * otherwise if doing out of order only "1 out of gap"
198*1da177e4SLinus Torvalds 	 * packets will be delayed.
199*1da177e4SLinus Torvalds 	 */
200*1da177e4SLinus Torvalds 	if (q->counter < q->gap) {
201*1da177e4SLinus Torvalds 		++q->counter;
202*1da177e4SLinus Torvalds 		ret = q->qdisc->enqueue(skb, q->qdisc);
203*1da177e4SLinus Torvalds 	} else {
204*1da177e4SLinus Torvalds 		q->counter = 0;
205*1da177e4SLinus Torvalds 		ret = delay_skb(sch, skb);
206*1da177e4SLinus Torvalds 	}
207*1da177e4SLinus Torvalds 
208*1da177e4SLinus Torvalds 	if (likely(ret == NET_XMIT_SUCCESS)) {
209*1da177e4SLinus Torvalds 		sch->q.qlen++;
210*1da177e4SLinus Torvalds 		sch->bstats.bytes += skb->len;
211*1da177e4SLinus Torvalds 		sch->bstats.packets++;
212*1da177e4SLinus Torvalds 	} else
213*1da177e4SLinus Torvalds 		sch->qstats.drops++;
214*1da177e4SLinus Torvalds 
215*1da177e4SLinus Torvalds 	return ret;
216*1da177e4SLinus Torvalds }
217*1da177e4SLinus Torvalds 
218*1da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */
219*1da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
220*1da177e4SLinus Torvalds {
221*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
222*1da177e4SLinus Torvalds 	int ret;
223*1da177e4SLinus Torvalds 
224*1da177e4SLinus Torvalds 	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
225*1da177e4SLinus Torvalds 		sch->q.qlen++;
226*1da177e4SLinus Torvalds 		sch->qstats.requeues++;
227*1da177e4SLinus Torvalds 	}
228*1da177e4SLinus Torvalds 
229*1da177e4SLinus Torvalds 	return ret;
230*1da177e4SLinus Torvalds }
231*1da177e4SLinus Torvalds 
232*1da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch)
233*1da177e4SLinus Torvalds {
234*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
235*1da177e4SLinus Torvalds 	unsigned int len;
236*1da177e4SLinus Torvalds 
237*1da177e4SLinus Torvalds 	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
238*1da177e4SLinus Torvalds 		sch->q.qlen--;
239*1da177e4SLinus Torvalds 		sch->qstats.drops++;
240*1da177e4SLinus Torvalds 	}
241*1da177e4SLinus Torvalds 	return len;
242*1da177e4SLinus Torvalds }
243*1da177e4SLinus Torvalds 
244*1da177e4SLinus Torvalds /* Dequeue packet.
245*1da177e4SLinus Torvalds  *  Move all packets that are ready to send from the delay holding
246*1da177e4SLinus Torvalds  *  list to the underlying qdisc, then just call dequeue
247*1da177e4SLinus Torvalds  */
248*1da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
249*1da177e4SLinus Torvalds {
250*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
251*1da177e4SLinus Torvalds 	struct sk_buff *skb;
252*1da177e4SLinus Torvalds 
253*1da177e4SLinus Torvalds 	skb = q->qdisc->dequeue(q->qdisc);
254*1da177e4SLinus Torvalds 	if (skb)
255*1da177e4SLinus Torvalds 		sch->q.qlen--;
256*1da177e4SLinus Torvalds 	return skb;
257*1da177e4SLinus Torvalds }
258*1da177e4SLinus Torvalds 
259*1da177e4SLinus Torvalds static void netem_watchdog(unsigned long arg)
260*1da177e4SLinus Torvalds {
261*1da177e4SLinus Torvalds 	struct Qdisc *sch = (struct Qdisc *)arg;
262*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
263*1da177e4SLinus Torvalds 	struct net_device *dev = sch->dev;
264*1da177e4SLinus Torvalds 	struct sk_buff *skb;
265*1da177e4SLinus Torvalds 	psched_time_t now;
266*1da177e4SLinus Torvalds 
267*1da177e4SLinus Torvalds 	pr_debug("netem_watchdog: fired @%lu\n", jiffies);
268*1da177e4SLinus Torvalds 
269*1da177e4SLinus Torvalds 	spin_lock_bh(&dev->queue_lock);
270*1da177e4SLinus Torvalds 	PSCHED_GET_TIME(now);
271*1da177e4SLinus Torvalds 
272*1da177e4SLinus Torvalds 	while ((skb = skb_peek(&q->delayed)) != NULL) {
273*1da177e4SLinus Torvalds 		const struct netem_skb_cb *cb
274*1da177e4SLinus Torvalds 			= (const struct netem_skb_cb *)skb->cb;
275*1da177e4SLinus Torvalds 		long delay
276*1da177e4SLinus Torvalds 			= PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
277*1da177e4SLinus Torvalds 		pr_debug("netem_watchdog: skb %p@%lu %ld\n",
278*1da177e4SLinus Torvalds 			 skb, jiffies, delay);
279*1da177e4SLinus Torvalds 
280*1da177e4SLinus Torvalds 		/* if more time remaining? */
281*1da177e4SLinus Torvalds 		if (delay > 0) {
282*1da177e4SLinus Torvalds 			mod_timer(&q->timer, jiffies + delay);
283*1da177e4SLinus Torvalds 			break;
284*1da177e4SLinus Torvalds 		}
285*1da177e4SLinus Torvalds 		__skb_unlink(skb, &q->delayed);
286*1da177e4SLinus Torvalds 
287*1da177e4SLinus Torvalds 		if (q->qdisc->enqueue(skb, q->qdisc)) {
288*1da177e4SLinus Torvalds 			sch->q.qlen--;
289*1da177e4SLinus Torvalds 			sch->qstats.drops++;
290*1da177e4SLinus Torvalds 		}
291*1da177e4SLinus Torvalds 	}
292*1da177e4SLinus Torvalds 	qdisc_run(dev);
293*1da177e4SLinus Torvalds 	spin_unlock_bh(&dev->queue_lock);
294*1da177e4SLinus Torvalds }
295*1da177e4SLinus Torvalds 
296*1da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
297*1da177e4SLinus Torvalds {
298*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
299*1da177e4SLinus Torvalds 
300*1da177e4SLinus Torvalds 	qdisc_reset(q->qdisc);
301*1da177e4SLinus Torvalds 	skb_queue_purge(&q->delayed);
302*1da177e4SLinus Torvalds 
303*1da177e4SLinus Torvalds 	sch->q.qlen = 0;
304*1da177e4SLinus Torvalds 	del_timer_sync(&q->timer);
305*1da177e4SLinus Torvalds }
306*1da177e4SLinus Torvalds 
307*1da177e4SLinus Torvalds static int set_fifo_limit(struct Qdisc *q, int limit)
308*1da177e4SLinus Torvalds {
309*1da177e4SLinus Torvalds         struct rtattr *rta;
310*1da177e4SLinus Torvalds 	int ret = -ENOMEM;
311*1da177e4SLinus Torvalds 
312*1da177e4SLinus Torvalds 	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
313*1da177e4SLinus Torvalds 	if (rta) {
314*1da177e4SLinus Torvalds 		rta->rta_type = RTM_NEWQDISC;
315*1da177e4SLinus Torvalds 		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
316*1da177e4SLinus Torvalds 		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
317*1da177e4SLinus Torvalds 
318*1da177e4SLinus Torvalds 		ret = q->ops->change(q, rta);
319*1da177e4SLinus Torvalds 		kfree(rta);
320*1da177e4SLinus Torvalds 	}
321*1da177e4SLinus Torvalds 	return ret;
322*1da177e4SLinus Torvalds }
323*1da177e4SLinus Torvalds 
324*1da177e4SLinus Torvalds /*
325*1da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
326*1da177e4SLinus Torvalds  * signed 16 bit values.
327*1da177e4SLinus Torvalds  */
328*1da177e4SLinus Torvalds static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
329*1da177e4SLinus Torvalds {
330*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
331*1da177e4SLinus Torvalds 	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
332*1da177e4SLinus Torvalds 	const __s16 *data = RTA_DATA(attr);
333*1da177e4SLinus Torvalds 	struct disttable *d;
334*1da177e4SLinus Torvalds 	int i;
335*1da177e4SLinus Torvalds 
336*1da177e4SLinus Torvalds 	if (n > 65536)
337*1da177e4SLinus Torvalds 		return -EINVAL;
338*1da177e4SLinus Torvalds 
339*1da177e4SLinus Torvalds 	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
340*1da177e4SLinus Torvalds 	if (!d)
341*1da177e4SLinus Torvalds 		return -ENOMEM;
342*1da177e4SLinus Torvalds 
343*1da177e4SLinus Torvalds 	d->size = n;
344*1da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
345*1da177e4SLinus Torvalds 		d->table[i] = data[i];
346*1da177e4SLinus Torvalds 
347*1da177e4SLinus Torvalds 	spin_lock_bh(&sch->dev->queue_lock);
348*1da177e4SLinus Torvalds 	d = xchg(&q->delay_dist, d);
349*1da177e4SLinus Torvalds 	spin_unlock_bh(&sch->dev->queue_lock);
350*1da177e4SLinus Torvalds 
351*1da177e4SLinus Torvalds 	kfree(d);
352*1da177e4SLinus Torvalds 	return 0;
353*1da177e4SLinus Torvalds }
354*1da177e4SLinus Torvalds 
355*1da177e4SLinus Torvalds static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
356*1da177e4SLinus Torvalds {
357*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
358*1da177e4SLinus Torvalds 	const struct tc_netem_corr *c = RTA_DATA(attr);
359*1da177e4SLinus Torvalds 
360*1da177e4SLinus Torvalds 	if (RTA_PAYLOAD(attr) != sizeof(*c))
361*1da177e4SLinus Torvalds 		return -EINVAL;
362*1da177e4SLinus Torvalds 
363*1da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
364*1da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
365*1da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
366*1da177e4SLinus Torvalds 	return 0;
367*1da177e4SLinus Torvalds }
368*1da177e4SLinus Torvalds 
369*1da177e4SLinus Torvalds static int netem_change(struct Qdisc *sch, struct rtattr *opt)
370*1da177e4SLinus Torvalds {
371*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
372*1da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
373*1da177e4SLinus Torvalds 	int ret;
374*1da177e4SLinus Torvalds 
375*1da177e4SLinus Torvalds 	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
376*1da177e4SLinus Torvalds 		return -EINVAL;
377*1da177e4SLinus Torvalds 
378*1da177e4SLinus Torvalds 	qopt = RTA_DATA(opt);
379*1da177e4SLinus Torvalds 	ret = set_fifo_limit(q->qdisc, qopt->limit);
380*1da177e4SLinus Torvalds 	if (ret) {
381*1da177e4SLinus Torvalds 		pr_debug("netem: can't set fifo limit\n");
382*1da177e4SLinus Torvalds 		return ret;
383*1da177e4SLinus Torvalds 	}
384*1da177e4SLinus Torvalds 
385*1da177e4SLinus Torvalds 	q->latency = qopt->latency;
386*1da177e4SLinus Torvalds 	q->jitter = qopt->jitter;
387*1da177e4SLinus Torvalds 	q->limit = qopt->limit;
388*1da177e4SLinus Torvalds 	q->gap = qopt->gap;
389*1da177e4SLinus Torvalds 	q->loss = qopt->loss;
390*1da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
391*1da177e4SLinus Torvalds 
392*1da177e4SLinus Torvalds 	/* Handle nested options after initial queue options.
393*1da177e4SLinus Torvalds 	 * Should have put all options in nested format but too late now.
394*1da177e4SLinus Torvalds 	 */
395*1da177e4SLinus Torvalds 	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
396*1da177e4SLinus Torvalds 		struct rtattr *tb[TCA_NETEM_MAX];
397*1da177e4SLinus Torvalds 		if (rtattr_parse(tb, TCA_NETEM_MAX,
398*1da177e4SLinus Torvalds 				 RTA_DATA(opt) + sizeof(*qopt),
399*1da177e4SLinus Torvalds 				 RTA_PAYLOAD(opt) - sizeof(*qopt)))
400*1da177e4SLinus Torvalds 			return -EINVAL;
401*1da177e4SLinus Torvalds 
402*1da177e4SLinus Torvalds 		if (tb[TCA_NETEM_CORR-1]) {
403*1da177e4SLinus Torvalds 			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
404*1da177e4SLinus Torvalds 			if (ret)
405*1da177e4SLinus Torvalds 				return ret;
406*1da177e4SLinus Torvalds 		}
407*1da177e4SLinus Torvalds 
408*1da177e4SLinus Torvalds 		if (tb[TCA_NETEM_DELAY_DIST-1]) {
409*1da177e4SLinus Torvalds 			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
410*1da177e4SLinus Torvalds 			if (ret)
411*1da177e4SLinus Torvalds 				return ret;
412*1da177e4SLinus Torvalds 		}
413*1da177e4SLinus Torvalds 	}
414*1da177e4SLinus Torvalds 
415*1da177e4SLinus Torvalds 
416*1da177e4SLinus Torvalds 	return 0;
417*1da177e4SLinus Torvalds }
418*1da177e4SLinus Torvalds 
419*1da177e4SLinus Torvalds static int netem_init(struct Qdisc *sch, struct rtattr *opt)
420*1da177e4SLinus Torvalds {
421*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
422*1da177e4SLinus Torvalds 	int ret;
423*1da177e4SLinus Torvalds 
424*1da177e4SLinus Torvalds 	if (!opt)
425*1da177e4SLinus Torvalds 		return -EINVAL;
426*1da177e4SLinus Torvalds 
427*1da177e4SLinus Torvalds 	skb_queue_head_init(&q->delayed);
428*1da177e4SLinus Torvalds 	init_timer(&q->timer);
429*1da177e4SLinus Torvalds 	q->timer.function = netem_watchdog;
430*1da177e4SLinus Torvalds 	q->timer.data = (unsigned long) sch;
431*1da177e4SLinus Torvalds 	q->counter = 0;
432*1da177e4SLinus Torvalds 
433*1da177e4SLinus Torvalds 	q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
434*1da177e4SLinus Torvalds 	if (!q->qdisc) {
435*1da177e4SLinus Torvalds 		pr_debug("netem: qdisc create failed\n");
436*1da177e4SLinus Torvalds 		return -ENOMEM;
437*1da177e4SLinus Torvalds 	}
438*1da177e4SLinus Torvalds 
439*1da177e4SLinus Torvalds 	ret = netem_change(sch, opt);
440*1da177e4SLinus Torvalds 	if (ret) {
441*1da177e4SLinus Torvalds 		pr_debug("netem: change failed\n");
442*1da177e4SLinus Torvalds 		qdisc_destroy(q->qdisc);
443*1da177e4SLinus Torvalds 	}
444*1da177e4SLinus Torvalds 	return ret;
445*1da177e4SLinus Torvalds }
446*1da177e4SLinus Torvalds 
447*1da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
448*1da177e4SLinus Torvalds {
449*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
450*1da177e4SLinus Torvalds 
451*1da177e4SLinus Torvalds 	del_timer_sync(&q->timer);
452*1da177e4SLinus Torvalds 	qdisc_destroy(q->qdisc);
453*1da177e4SLinus Torvalds 	kfree(q->delay_dist);
454*1da177e4SLinus Torvalds }
455*1da177e4SLinus Torvalds 
456*1da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
457*1da177e4SLinus Torvalds {
458*1da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
459*1da177e4SLinus Torvalds 	unsigned char	 *b = skb->tail;
460*1da177e4SLinus Torvalds 	struct rtattr *rta = (struct rtattr *) b;
461*1da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
462*1da177e4SLinus Torvalds 	struct tc_netem_corr cor;
463*1da177e4SLinus Torvalds 
464*1da177e4SLinus Torvalds 	qopt.latency = q->latency;
465*1da177e4SLinus Torvalds 	qopt.jitter = q->jitter;
466*1da177e4SLinus Torvalds 	qopt.limit = q->limit;
467*1da177e4SLinus Torvalds 	qopt.loss = q->loss;
468*1da177e4SLinus Torvalds 	qopt.gap = q->gap;
469*1da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
470*1da177e4SLinus Torvalds 	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
471*1da177e4SLinus Torvalds 
472*1da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
473*1da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
474*1da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
475*1da177e4SLinus Torvalds 	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
476*1da177e4SLinus Torvalds 	rta->rta_len = skb->tail - b;
477*1da177e4SLinus Torvalds 
478*1da177e4SLinus Torvalds 	return skb->len;
479*1da177e4SLinus Torvalds 
480*1da177e4SLinus Torvalds rtattr_failure:
481*1da177e4SLinus Torvalds 	skb_trim(skb, b - skb->data);
482*1da177e4SLinus Torvalds 	return -1;
483*1da177e4SLinus Torvalds }
484*1da177e4SLinus Torvalds 
485*1da177e4SLinus Torvalds static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
486*1da177e4SLinus Torvalds 			  struct sk_buff *skb, struct tcmsg *tcm)
487*1da177e4SLinus Torvalds {
488*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
489*1da177e4SLinus Torvalds 
490*1da177e4SLinus Torvalds 	if (cl != 1) 	/* only one class */
491*1da177e4SLinus Torvalds 		return -ENOENT;
492*1da177e4SLinus Torvalds 
493*1da177e4SLinus Torvalds 	tcm->tcm_handle |= TC_H_MIN(1);
494*1da177e4SLinus Torvalds 	tcm->tcm_info = q->qdisc->handle;
495*1da177e4SLinus Torvalds 
496*1da177e4SLinus Torvalds 	return 0;
497*1da177e4SLinus Torvalds }
498*1da177e4SLinus Torvalds 
499*1da177e4SLinus Torvalds static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
500*1da177e4SLinus Torvalds 		     struct Qdisc **old)
501*1da177e4SLinus Torvalds {
502*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
503*1da177e4SLinus Torvalds 
504*1da177e4SLinus Torvalds 	if (new == NULL)
505*1da177e4SLinus Torvalds 		new = &noop_qdisc;
506*1da177e4SLinus Torvalds 
507*1da177e4SLinus Torvalds 	sch_tree_lock(sch);
508*1da177e4SLinus Torvalds 	*old = xchg(&q->qdisc, new);
509*1da177e4SLinus Torvalds 	qdisc_reset(*old);
510*1da177e4SLinus Torvalds 	sch->q.qlen = 0;
511*1da177e4SLinus Torvalds 	sch_tree_unlock(sch);
512*1da177e4SLinus Torvalds 
513*1da177e4SLinus Torvalds 	return 0;
514*1da177e4SLinus Torvalds }
515*1da177e4SLinus Torvalds 
516*1da177e4SLinus Torvalds static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
517*1da177e4SLinus Torvalds {
518*1da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
519*1da177e4SLinus Torvalds 	return q->qdisc;
520*1da177e4SLinus Torvalds }
521*1da177e4SLinus Torvalds 
522*1da177e4SLinus Torvalds static unsigned long netem_get(struct Qdisc *sch, u32 classid)
523*1da177e4SLinus Torvalds {
524*1da177e4SLinus Torvalds 	return 1;
525*1da177e4SLinus Torvalds }
526*1da177e4SLinus Torvalds 
527*1da177e4SLinus Torvalds static void netem_put(struct Qdisc *sch, unsigned long arg)
528*1da177e4SLinus Torvalds {
529*1da177e4SLinus Torvalds }
530*1da177e4SLinus Torvalds 
531*1da177e4SLinus Torvalds static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
532*1da177e4SLinus Torvalds 			    struct rtattr **tca, unsigned long *arg)
533*1da177e4SLinus Torvalds {
534*1da177e4SLinus Torvalds 	return -ENOSYS;
535*1da177e4SLinus Torvalds }
536*1da177e4SLinus Torvalds 
537*1da177e4SLinus Torvalds static int netem_delete(struct Qdisc *sch, unsigned long arg)
538*1da177e4SLinus Torvalds {
539*1da177e4SLinus Torvalds 	return -ENOSYS;
540*1da177e4SLinus Torvalds }
541*1da177e4SLinus Torvalds 
542*1da177e4SLinus Torvalds static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
543*1da177e4SLinus Torvalds {
544*1da177e4SLinus Torvalds 	if (!walker->stop) {
545*1da177e4SLinus Torvalds 		if (walker->count >= walker->skip)
546*1da177e4SLinus Torvalds 			if (walker->fn(sch, 1, walker) < 0) {
547*1da177e4SLinus Torvalds 				walker->stop = 1;
548*1da177e4SLinus Torvalds 				return;
549*1da177e4SLinus Torvalds 			}
550*1da177e4SLinus Torvalds 		walker->count++;
551*1da177e4SLinus Torvalds 	}
552*1da177e4SLinus Torvalds }
553*1da177e4SLinus Torvalds 
554*1da177e4SLinus Torvalds static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
555*1da177e4SLinus Torvalds {
556*1da177e4SLinus Torvalds 	return NULL;
557*1da177e4SLinus Torvalds }
558*1da177e4SLinus Torvalds 
559*1da177e4SLinus Torvalds static struct Qdisc_class_ops netem_class_ops = {
560*1da177e4SLinus Torvalds 	.graft		=	netem_graft,
561*1da177e4SLinus Torvalds 	.leaf		=	netem_leaf,
562*1da177e4SLinus Torvalds 	.get		=	netem_get,
563*1da177e4SLinus Torvalds 	.put		=	netem_put,
564*1da177e4SLinus Torvalds 	.change		=	netem_change_class,
565*1da177e4SLinus Torvalds 	.delete		=	netem_delete,
566*1da177e4SLinus Torvalds 	.walk		=	netem_walk,
567*1da177e4SLinus Torvalds 	.tcf_chain	=	netem_find_tcf,
568*1da177e4SLinus Torvalds 	.dump		=	netem_dump_class,
569*1da177e4SLinus Torvalds };
570*1da177e4SLinus Torvalds 
571*1da177e4SLinus Torvalds static struct Qdisc_ops netem_qdisc_ops = {
572*1da177e4SLinus Torvalds 	.id		=	"netem",
573*1da177e4SLinus Torvalds 	.cl_ops		=	&netem_class_ops,
574*1da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
575*1da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
576*1da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
577*1da177e4SLinus Torvalds 	.requeue	=	netem_requeue,
578*1da177e4SLinus Torvalds 	.drop		=	netem_drop,
579*1da177e4SLinus Torvalds 	.init		=	netem_init,
580*1da177e4SLinus Torvalds 	.reset		=	netem_reset,
581*1da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
582*1da177e4SLinus Torvalds 	.change		=	netem_change,
583*1da177e4SLinus Torvalds 	.dump		=	netem_dump,
584*1da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
585*1da177e4SLinus Torvalds };
586*1da177e4SLinus Torvalds 
587*1da177e4SLinus Torvalds 
588*1da177e4SLinus Torvalds static int __init netem_module_init(void)
589*1da177e4SLinus Torvalds {
590*1da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
591*1da177e4SLinus Torvalds }
592*1da177e4SLinus Torvalds static void __exit netem_module_exit(void)
593*1da177e4SLinus Torvalds {
594*1da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
595*1da177e4SLinus Torvalds }
596*1da177e4SLinus Torvalds module_init(netem_module_init)
597*1da177e4SLinus Torvalds module_exit(netem_module_exit)
598*1da177e4SLinus Torvalds MODULE_LICENSE("GPL");
599