1*1da177e4SLinus Torvalds /* 2*1da177e4SLinus Torvalds * net/sched/sch_netem.c Network emulator 3*1da177e4SLinus Torvalds * 4*1da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 5*1da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 6*1da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 7*1da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 8*1da177e4SLinus Torvalds * 9*1da177e4SLinus Torvalds * Many of the algorithms and ideas for this came from 10*1da177e4SLinus Torvalds * NIST Net which is not copyrighted. 11*1da177e4SLinus Torvalds * 12*1da177e4SLinus Torvalds * Authors: Stephen Hemminger <shemminger@osdl.org> 13*1da177e4SLinus Torvalds * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 14*1da177e4SLinus Torvalds */ 15*1da177e4SLinus Torvalds 16*1da177e4SLinus Torvalds #include <linux/config.h> 17*1da177e4SLinus Torvalds #include <linux/module.h> 18*1da177e4SLinus Torvalds #include <linux/bitops.h> 19*1da177e4SLinus Torvalds #include <linux/types.h> 20*1da177e4SLinus Torvalds #include <linux/kernel.h> 21*1da177e4SLinus Torvalds #include <linux/errno.h> 22*1da177e4SLinus Torvalds #include <linux/netdevice.h> 23*1da177e4SLinus Torvalds #include <linux/skbuff.h> 24*1da177e4SLinus Torvalds #include <linux/rtnetlink.h> 25*1da177e4SLinus Torvalds 26*1da177e4SLinus Torvalds #include <net/pkt_sched.h> 27*1da177e4SLinus Torvalds 28*1da177e4SLinus Torvalds /* Network Emulation Queuing algorithm. 29*1da177e4SLinus Torvalds ==================================== 30*1da177e4SLinus Torvalds 31*1da177e4SLinus Torvalds Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 32*1da177e4SLinus Torvalds Network Emulation Tool 33*1da177e4SLinus Torvalds [2] Luigi Rizzo, DummyNet for FreeBSD 34*1da177e4SLinus Torvalds 35*1da177e4SLinus Torvalds ---------------------------------------------------------------- 36*1da177e4SLinus Torvalds 37*1da177e4SLinus Torvalds This started out as a simple way to delay outgoing packets to 38*1da177e4SLinus Torvalds test TCP but has grown to include most of the functionality 39*1da177e4SLinus Torvalds of a full blown network emulator like NISTnet. It can delay 40*1da177e4SLinus Torvalds packets and add random jitter (and correlation). The random 41*1da177e4SLinus Torvalds distribution can be loaded from a table as well to provide 42*1da177e4SLinus Torvalds normal, Pareto, or experimental curves. Packet loss, 43*1da177e4SLinus Torvalds duplication, and reordering can also be emulated. 44*1da177e4SLinus Torvalds 45*1da177e4SLinus Torvalds This qdisc does not do classification that can be handled in 46*1da177e4SLinus Torvalds layering other disciplines. It does not need to do bandwidth 47*1da177e4SLinus Torvalds control either since that can be handled by using token 48*1da177e4SLinus Torvalds bucket or other rate control. 49*1da177e4SLinus Torvalds 50*1da177e4SLinus Torvalds The simulator is limited by the Linux timer resolution 51*1da177e4SLinus Torvalds and will create packet bursts on the HZ boundary (1ms). 52*1da177e4SLinus Torvalds */ 53*1da177e4SLinus Torvalds 54*1da177e4SLinus Torvalds struct netem_sched_data { 55*1da177e4SLinus Torvalds struct Qdisc *qdisc; 56*1da177e4SLinus Torvalds struct sk_buff_head delayed; 57*1da177e4SLinus Torvalds struct timer_list timer; 58*1da177e4SLinus Torvalds 59*1da177e4SLinus Torvalds u32 latency; 60*1da177e4SLinus Torvalds u32 loss; 61*1da177e4SLinus Torvalds u32 limit; 62*1da177e4SLinus Torvalds u32 counter; 63*1da177e4SLinus Torvalds u32 gap; 64*1da177e4SLinus Torvalds u32 jitter; 65*1da177e4SLinus Torvalds u32 duplicate; 66*1da177e4SLinus Torvalds 67*1da177e4SLinus Torvalds struct crndstate { 68*1da177e4SLinus Torvalds unsigned long last; 69*1da177e4SLinus Torvalds unsigned long rho; 70*1da177e4SLinus Torvalds } delay_cor, loss_cor, dup_cor; 71*1da177e4SLinus Torvalds 72*1da177e4SLinus Torvalds struct disttable { 73*1da177e4SLinus Torvalds u32 size; 74*1da177e4SLinus Torvalds s16 table[0]; 75*1da177e4SLinus Torvalds } *delay_dist; 76*1da177e4SLinus Torvalds }; 77*1da177e4SLinus Torvalds 78*1da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */ 79*1da177e4SLinus Torvalds struct netem_skb_cb { 80*1da177e4SLinus Torvalds psched_time_t time_to_send; 81*1da177e4SLinus Torvalds }; 82*1da177e4SLinus Torvalds 83*1da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator 84*1da177e4SLinus Torvalds * Use entropy source for initial seed. 85*1da177e4SLinus Torvalds */ 86*1da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho) 87*1da177e4SLinus Torvalds { 88*1da177e4SLinus Torvalds state->rho = rho; 89*1da177e4SLinus Torvalds state->last = net_random(); 90*1da177e4SLinus Torvalds } 91*1da177e4SLinus Torvalds 92*1da177e4SLinus Torvalds /* get_crandom - correlated random number generator 93*1da177e4SLinus Torvalds * Next number depends on last value. 94*1da177e4SLinus Torvalds * rho is scaled to avoid floating point. 95*1da177e4SLinus Torvalds */ 96*1da177e4SLinus Torvalds static unsigned long get_crandom(struct crndstate *state) 97*1da177e4SLinus Torvalds { 98*1da177e4SLinus Torvalds u64 value, rho; 99*1da177e4SLinus Torvalds unsigned long answer; 100*1da177e4SLinus Torvalds 101*1da177e4SLinus Torvalds if (state->rho == 0) /* no correllation */ 102*1da177e4SLinus Torvalds return net_random(); 103*1da177e4SLinus Torvalds 104*1da177e4SLinus Torvalds value = net_random(); 105*1da177e4SLinus Torvalds rho = (u64)state->rho + 1; 106*1da177e4SLinus Torvalds answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 107*1da177e4SLinus Torvalds state->last = answer; 108*1da177e4SLinus Torvalds return answer; 109*1da177e4SLinus Torvalds } 110*1da177e4SLinus Torvalds 111*1da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and 112*1da177e4SLinus Torvalds * std deviation sigma. Uses table lookup to approximate the desired 113*1da177e4SLinus Torvalds * distribution, and a uniformly-distributed pseudo-random source. 114*1da177e4SLinus Torvalds */ 115*1da177e4SLinus Torvalds static long tabledist(unsigned long mu, long sigma, 116*1da177e4SLinus Torvalds struct crndstate *state, const struct disttable *dist) 117*1da177e4SLinus Torvalds { 118*1da177e4SLinus Torvalds long t, x; 119*1da177e4SLinus Torvalds unsigned long rnd; 120*1da177e4SLinus Torvalds 121*1da177e4SLinus Torvalds if (sigma == 0) 122*1da177e4SLinus Torvalds return mu; 123*1da177e4SLinus Torvalds 124*1da177e4SLinus Torvalds rnd = get_crandom(state); 125*1da177e4SLinus Torvalds 126*1da177e4SLinus Torvalds /* default uniform distribution */ 127*1da177e4SLinus Torvalds if (dist == NULL) 128*1da177e4SLinus Torvalds return (rnd % (2*sigma)) - sigma + mu; 129*1da177e4SLinus Torvalds 130*1da177e4SLinus Torvalds t = dist->table[rnd % dist->size]; 131*1da177e4SLinus Torvalds x = (sigma % NETEM_DIST_SCALE) * t; 132*1da177e4SLinus Torvalds if (x >= 0) 133*1da177e4SLinus Torvalds x += NETEM_DIST_SCALE/2; 134*1da177e4SLinus Torvalds else 135*1da177e4SLinus Torvalds x -= NETEM_DIST_SCALE/2; 136*1da177e4SLinus Torvalds 137*1da177e4SLinus Torvalds return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 138*1da177e4SLinus Torvalds } 139*1da177e4SLinus Torvalds 140*1da177e4SLinus Torvalds /* Put skb in the private delayed queue. */ 141*1da177e4SLinus Torvalds static int delay_skb(struct Qdisc *sch, struct sk_buff *skb) 142*1da177e4SLinus Torvalds { 143*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 144*1da177e4SLinus Torvalds struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; 145*1da177e4SLinus Torvalds psched_tdiff_t td; 146*1da177e4SLinus Torvalds psched_time_t now; 147*1da177e4SLinus Torvalds 148*1da177e4SLinus Torvalds PSCHED_GET_TIME(now); 149*1da177e4SLinus Torvalds td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); 150*1da177e4SLinus Torvalds PSCHED_TADD2(now, td, cb->time_to_send); 151*1da177e4SLinus Torvalds 152*1da177e4SLinus Torvalds /* Always queue at tail to keep packets in order */ 153*1da177e4SLinus Torvalds if (likely(q->delayed.qlen < q->limit)) { 154*1da177e4SLinus Torvalds __skb_queue_tail(&q->delayed, skb); 155*1da177e4SLinus Torvalds if (!timer_pending(&q->timer)) { 156*1da177e4SLinus Torvalds q->timer.expires = jiffies + PSCHED_US2JIFFIE(td); 157*1da177e4SLinus Torvalds add_timer(&q->timer); 158*1da177e4SLinus Torvalds } 159*1da177e4SLinus Torvalds return NET_XMIT_SUCCESS; 160*1da177e4SLinus Torvalds } 161*1da177e4SLinus Torvalds 162*1da177e4SLinus Torvalds kfree_skb(skb); 163*1da177e4SLinus Torvalds return NET_XMIT_DROP; 164*1da177e4SLinus Torvalds } 165*1da177e4SLinus Torvalds 166*1da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 167*1da177e4SLinus Torvalds { 168*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 169*1da177e4SLinus Torvalds struct sk_buff *skb2; 170*1da177e4SLinus Torvalds int ret; 171*1da177e4SLinus Torvalds 172*1da177e4SLinus Torvalds pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies); 173*1da177e4SLinus Torvalds 174*1da177e4SLinus Torvalds /* Random packet drop 0 => none, ~0 => all */ 175*1da177e4SLinus Torvalds if (q->loss && q->loss >= get_crandom(&q->loss_cor)) { 176*1da177e4SLinus Torvalds pr_debug("netem_enqueue: random loss\n"); 177*1da177e4SLinus Torvalds sch->qstats.drops++; 178*1da177e4SLinus Torvalds kfree_skb(skb); 179*1da177e4SLinus Torvalds return 0; /* lie about loss so TCP doesn't know */ 180*1da177e4SLinus Torvalds } 181*1da177e4SLinus Torvalds 182*1da177e4SLinus Torvalds /* Random duplication */ 183*1da177e4SLinus Torvalds if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor) 184*1da177e4SLinus Torvalds && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 185*1da177e4SLinus Torvalds pr_debug("netem_enqueue: dup %p\n", skb2); 186*1da177e4SLinus Torvalds 187*1da177e4SLinus Torvalds if (delay_skb(sch, skb2)) { 188*1da177e4SLinus Torvalds sch->q.qlen++; 189*1da177e4SLinus Torvalds sch->bstats.bytes += skb2->len; 190*1da177e4SLinus Torvalds sch->bstats.packets++; 191*1da177e4SLinus Torvalds } else 192*1da177e4SLinus Torvalds sch->qstats.drops++; 193*1da177e4SLinus Torvalds } 194*1da177e4SLinus Torvalds 195*1da177e4SLinus Torvalds /* If doing simple delay then gap == 0 so all packets 196*1da177e4SLinus Torvalds * go into the delayed holding queue 197*1da177e4SLinus Torvalds * otherwise if doing out of order only "1 out of gap" 198*1da177e4SLinus Torvalds * packets will be delayed. 199*1da177e4SLinus Torvalds */ 200*1da177e4SLinus Torvalds if (q->counter < q->gap) { 201*1da177e4SLinus Torvalds ++q->counter; 202*1da177e4SLinus Torvalds ret = q->qdisc->enqueue(skb, q->qdisc); 203*1da177e4SLinus Torvalds } else { 204*1da177e4SLinus Torvalds q->counter = 0; 205*1da177e4SLinus Torvalds ret = delay_skb(sch, skb); 206*1da177e4SLinus Torvalds } 207*1da177e4SLinus Torvalds 208*1da177e4SLinus Torvalds if (likely(ret == NET_XMIT_SUCCESS)) { 209*1da177e4SLinus Torvalds sch->q.qlen++; 210*1da177e4SLinus Torvalds sch->bstats.bytes += skb->len; 211*1da177e4SLinus Torvalds sch->bstats.packets++; 212*1da177e4SLinus Torvalds } else 213*1da177e4SLinus Torvalds sch->qstats.drops++; 214*1da177e4SLinus Torvalds 215*1da177e4SLinus Torvalds return ret; 216*1da177e4SLinus Torvalds } 217*1da177e4SLinus Torvalds 218*1da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */ 219*1da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) 220*1da177e4SLinus Torvalds { 221*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 222*1da177e4SLinus Torvalds int ret; 223*1da177e4SLinus Torvalds 224*1da177e4SLinus Torvalds if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { 225*1da177e4SLinus Torvalds sch->q.qlen++; 226*1da177e4SLinus Torvalds sch->qstats.requeues++; 227*1da177e4SLinus Torvalds } 228*1da177e4SLinus Torvalds 229*1da177e4SLinus Torvalds return ret; 230*1da177e4SLinus Torvalds } 231*1da177e4SLinus Torvalds 232*1da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch) 233*1da177e4SLinus Torvalds { 234*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 235*1da177e4SLinus Torvalds unsigned int len; 236*1da177e4SLinus Torvalds 237*1da177e4SLinus Torvalds if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) { 238*1da177e4SLinus Torvalds sch->q.qlen--; 239*1da177e4SLinus Torvalds sch->qstats.drops++; 240*1da177e4SLinus Torvalds } 241*1da177e4SLinus Torvalds return len; 242*1da177e4SLinus Torvalds } 243*1da177e4SLinus Torvalds 244*1da177e4SLinus Torvalds /* Dequeue packet. 245*1da177e4SLinus Torvalds * Move all packets that are ready to send from the delay holding 246*1da177e4SLinus Torvalds * list to the underlying qdisc, then just call dequeue 247*1da177e4SLinus Torvalds */ 248*1da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch) 249*1da177e4SLinus Torvalds { 250*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 251*1da177e4SLinus Torvalds struct sk_buff *skb; 252*1da177e4SLinus Torvalds 253*1da177e4SLinus Torvalds skb = q->qdisc->dequeue(q->qdisc); 254*1da177e4SLinus Torvalds if (skb) 255*1da177e4SLinus Torvalds sch->q.qlen--; 256*1da177e4SLinus Torvalds return skb; 257*1da177e4SLinus Torvalds } 258*1da177e4SLinus Torvalds 259*1da177e4SLinus Torvalds static void netem_watchdog(unsigned long arg) 260*1da177e4SLinus Torvalds { 261*1da177e4SLinus Torvalds struct Qdisc *sch = (struct Qdisc *)arg; 262*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 263*1da177e4SLinus Torvalds struct net_device *dev = sch->dev; 264*1da177e4SLinus Torvalds struct sk_buff *skb; 265*1da177e4SLinus Torvalds psched_time_t now; 266*1da177e4SLinus Torvalds 267*1da177e4SLinus Torvalds pr_debug("netem_watchdog: fired @%lu\n", jiffies); 268*1da177e4SLinus Torvalds 269*1da177e4SLinus Torvalds spin_lock_bh(&dev->queue_lock); 270*1da177e4SLinus Torvalds PSCHED_GET_TIME(now); 271*1da177e4SLinus Torvalds 272*1da177e4SLinus Torvalds while ((skb = skb_peek(&q->delayed)) != NULL) { 273*1da177e4SLinus Torvalds const struct netem_skb_cb *cb 274*1da177e4SLinus Torvalds = (const struct netem_skb_cb *)skb->cb; 275*1da177e4SLinus Torvalds long delay 276*1da177e4SLinus Torvalds = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); 277*1da177e4SLinus Torvalds pr_debug("netem_watchdog: skb %p@%lu %ld\n", 278*1da177e4SLinus Torvalds skb, jiffies, delay); 279*1da177e4SLinus Torvalds 280*1da177e4SLinus Torvalds /* if more time remaining? */ 281*1da177e4SLinus Torvalds if (delay > 0) { 282*1da177e4SLinus Torvalds mod_timer(&q->timer, jiffies + delay); 283*1da177e4SLinus Torvalds break; 284*1da177e4SLinus Torvalds } 285*1da177e4SLinus Torvalds __skb_unlink(skb, &q->delayed); 286*1da177e4SLinus Torvalds 287*1da177e4SLinus Torvalds if (q->qdisc->enqueue(skb, q->qdisc)) { 288*1da177e4SLinus Torvalds sch->q.qlen--; 289*1da177e4SLinus Torvalds sch->qstats.drops++; 290*1da177e4SLinus Torvalds } 291*1da177e4SLinus Torvalds } 292*1da177e4SLinus Torvalds qdisc_run(dev); 293*1da177e4SLinus Torvalds spin_unlock_bh(&dev->queue_lock); 294*1da177e4SLinus Torvalds } 295*1da177e4SLinus Torvalds 296*1da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch) 297*1da177e4SLinus Torvalds { 298*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 299*1da177e4SLinus Torvalds 300*1da177e4SLinus Torvalds qdisc_reset(q->qdisc); 301*1da177e4SLinus Torvalds skb_queue_purge(&q->delayed); 302*1da177e4SLinus Torvalds 303*1da177e4SLinus Torvalds sch->q.qlen = 0; 304*1da177e4SLinus Torvalds del_timer_sync(&q->timer); 305*1da177e4SLinus Torvalds } 306*1da177e4SLinus Torvalds 307*1da177e4SLinus Torvalds static int set_fifo_limit(struct Qdisc *q, int limit) 308*1da177e4SLinus Torvalds { 309*1da177e4SLinus Torvalds struct rtattr *rta; 310*1da177e4SLinus Torvalds int ret = -ENOMEM; 311*1da177e4SLinus Torvalds 312*1da177e4SLinus Torvalds rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 313*1da177e4SLinus Torvalds if (rta) { 314*1da177e4SLinus Torvalds rta->rta_type = RTM_NEWQDISC; 315*1da177e4SLinus Torvalds rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 316*1da177e4SLinus Torvalds ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 317*1da177e4SLinus Torvalds 318*1da177e4SLinus Torvalds ret = q->ops->change(q, rta); 319*1da177e4SLinus Torvalds kfree(rta); 320*1da177e4SLinus Torvalds } 321*1da177e4SLinus Torvalds return ret; 322*1da177e4SLinus Torvalds } 323*1da177e4SLinus Torvalds 324*1da177e4SLinus Torvalds /* 325*1da177e4SLinus Torvalds * Distribution data is a variable size payload containing 326*1da177e4SLinus Torvalds * signed 16 bit values. 327*1da177e4SLinus Torvalds */ 328*1da177e4SLinus Torvalds static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr) 329*1da177e4SLinus Torvalds { 330*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 331*1da177e4SLinus Torvalds unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16); 332*1da177e4SLinus Torvalds const __s16 *data = RTA_DATA(attr); 333*1da177e4SLinus Torvalds struct disttable *d; 334*1da177e4SLinus Torvalds int i; 335*1da177e4SLinus Torvalds 336*1da177e4SLinus Torvalds if (n > 65536) 337*1da177e4SLinus Torvalds return -EINVAL; 338*1da177e4SLinus Torvalds 339*1da177e4SLinus Torvalds d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 340*1da177e4SLinus Torvalds if (!d) 341*1da177e4SLinus Torvalds return -ENOMEM; 342*1da177e4SLinus Torvalds 343*1da177e4SLinus Torvalds d->size = n; 344*1da177e4SLinus Torvalds for (i = 0; i < n; i++) 345*1da177e4SLinus Torvalds d->table[i] = data[i]; 346*1da177e4SLinus Torvalds 347*1da177e4SLinus Torvalds spin_lock_bh(&sch->dev->queue_lock); 348*1da177e4SLinus Torvalds d = xchg(&q->delay_dist, d); 349*1da177e4SLinus Torvalds spin_unlock_bh(&sch->dev->queue_lock); 350*1da177e4SLinus Torvalds 351*1da177e4SLinus Torvalds kfree(d); 352*1da177e4SLinus Torvalds return 0; 353*1da177e4SLinus Torvalds } 354*1da177e4SLinus Torvalds 355*1da177e4SLinus Torvalds static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) 356*1da177e4SLinus Torvalds { 357*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 358*1da177e4SLinus Torvalds const struct tc_netem_corr *c = RTA_DATA(attr); 359*1da177e4SLinus Torvalds 360*1da177e4SLinus Torvalds if (RTA_PAYLOAD(attr) != sizeof(*c)) 361*1da177e4SLinus Torvalds return -EINVAL; 362*1da177e4SLinus Torvalds 363*1da177e4SLinus Torvalds init_crandom(&q->delay_cor, c->delay_corr); 364*1da177e4SLinus Torvalds init_crandom(&q->loss_cor, c->loss_corr); 365*1da177e4SLinus Torvalds init_crandom(&q->dup_cor, c->dup_corr); 366*1da177e4SLinus Torvalds return 0; 367*1da177e4SLinus Torvalds } 368*1da177e4SLinus Torvalds 369*1da177e4SLinus Torvalds static int netem_change(struct Qdisc *sch, struct rtattr *opt) 370*1da177e4SLinus Torvalds { 371*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 372*1da177e4SLinus Torvalds struct tc_netem_qopt *qopt; 373*1da177e4SLinus Torvalds int ret; 374*1da177e4SLinus Torvalds 375*1da177e4SLinus Torvalds if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 376*1da177e4SLinus Torvalds return -EINVAL; 377*1da177e4SLinus Torvalds 378*1da177e4SLinus Torvalds qopt = RTA_DATA(opt); 379*1da177e4SLinus Torvalds ret = set_fifo_limit(q->qdisc, qopt->limit); 380*1da177e4SLinus Torvalds if (ret) { 381*1da177e4SLinus Torvalds pr_debug("netem: can't set fifo limit\n"); 382*1da177e4SLinus Torvalds return ret; 383*1da177e4SLinus Torvalds } 384*1da177e4SLinus Torvalds 385*1da177e4SLinus Torvalds q->latency = qopt->latency; 386*1da177e4SLinus Torvalds q->jitter = qopt->jitter; 387*1da177e4SLinus Torvalds q->limit = qopt->limit; 388*1da177e4SLinus Torvalds q->gap = qopt->gap; 389*1da177e4SLinus Torvalds q->loss = qopt->loss; 390*1da177e4SLinus Torvalds q->duplicate = qopt->duplicate; 391*1da177e4SLinus Torvalds 392*1da177e4SLinus Torvalds /* Handle nested options after initial queue options. 393*1da177e4SLinus Torvalds * Should have put all options in nested format but too late now. 394*1da177e4SLinus Torvalds */ 395*1da177e4SLinus Torvalds if (RTA_PAYLOAD(opt) > sizeof(*qopt)) { 396*1da177e4SLinus Torvalds struct rtattr *tb[TCA_NETEM_MAX]; 397*1da177e4SLinus Torvalds if (rtattr_parse(tb, TCA_NETEM_MAX, 398*1da177e4SLinus Torvalds RTA_DATA(opt) + sizeof(*qopt), 399*1da177e4SLinus Torvalds RTA_PAYLOAD(opt) - sizeof(*qopt))) 400*1da177e4SLinus Torvalds return -EINVAL; 401*1da177e4SLinus Torvalds 402*1da177e4SLinus Torvalds if (tb[TCA_NETEM_CORR-1]) { 403*1da177e4SLinus Torvalds ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]); 404*1da177e4SLinus Torvalds if (ret) 405*1da177e4SLinus Torvalds return ret; 406*1da177e4SLinus Torvalds } 407*1da177e4SLinus Torvalds 408*1da177e4SLinus Torvalds if (tb[TCA_NETEM_DELAY_DIST-1]) { 409*1da177e4SLinus Torvalds ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]); 410*1da177e4SLinus Torvalds if (ret) 411*1da177e4SLinus Torvalds return ret; 412*1da177e4SLinus Torvalds } 413*1da177e4SLinus Torvalds } 414*1da177e4SLinus Torvalds 415*1da177e4SLinus Torvalds 416*1da177e4SLinus Torvalds return 0; 417*1da177e4SLinus Torvalds } 418*1da177e4SLinus Torvalds 419*1da177e4SLinus Torvalds static int netem_init(struct Qdisc *sch, struct rtattr *opt) 420*1da177e4SLinus Torvalds { 421*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 422*1da177e4SLinus Torvalds int ret; 423*1da177e4SLinus Torvalds 424*1da177e4SLinus Torvalds if (!opt) 425*1da177e4SLinus Torvalds return -EINVAL; 426*1da177e4SLinus Torvalds 427*1da177e4SLinus Torvalds skb_queue_head_init(&q->delayed); 428*1da177e4SLinus Torvalds init_timer(&q->timer); 429*1da177e4SLinus Torvalds q->timer.function = netem_watchdog; 430*1da177e4SLinus Torvalds q->timer.data = (unsigned long) sch; 431*1da177e4SLinus Torvalds q->counter = 0; 432*1da177e4SLinus Torvalds 433*1da177e4SLinus Torvalds q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 434*1da177e4SLinus Torvalds if (!q->qdisc) { 435*1da177e4SLinus Torvalds pr_debug("netem: qdisc create failed\n"); 436*1da177e4SLinus Torvalds return -ENOMEM; 437*1da177e4SLinus Torvalds } 438*1da177e4SLinus Torvalds 439*1da177e4SLinus Torvalds ret = netem_change(sch, opt); 440*1da177e4SLinus Torvalds if (ret) { 441*1da177e4SLinus Torvalds pr_debug("netem: change failed\n"); 442*1da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 443*1da177e4SLinus Torvalds } 444*1da177e4SLinus Torvalds return ret; 445*1da177e4SLinus Torvalds } 446*1da177e4SLinus Torvalds 447*1da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch) 448*1da177e4SLinus Torvalds { 449*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 450*1da177e4SLinus Torvalds 451*1da177e4SLinus Torvalds del_timer_sync(&q->timer); 452*1da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 453*1da177e4SLinus Torvalds kfree(q->delay_dist); 454*1da177e4SLinus Torvalds } 455*1da177e4SLinus Torvalds 456*1da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 457*1da177e4SLinus Torvalds { 458*1da177e4SLinus Torvalds const struct netem_sched_data *q = qdisc_priv(sch); 459*1da177e4SLinus Torvalds unsigned char *b = skb->tail; 460*1da177e4SLinus Torvalds struct rtattr *rta = (struct rtattr *) b; 461*1da177e4SLinus Torvalds struct tc_netem_qopt qopt; 462*1da177e4SLinus Torvalds struct tc_netem_corr cor; 463*1da177e4SLinus Torvalds 464*1da177e4SLinus Torvalds qopt.latency = q->latency; 465*1da177e4SLinus Torvalds qopt.jitter = q->jitter; 466*1da177e4SLinus Torvalds qopt.limit = q->limit; 467*1da177e4SLinus Torvalds qopt.loss = q->loss; 468*1da177e4SLinus Torvalds qopt.gap = q->gap; 469*1da177e4SLinus Torvalds qopt.duplicate = q->duplicate; 470*1da177e4SLinus Torvalds RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 471*1da177e4SLinus Torvalds 472*1da177e4SLinus Torvalds cor.delay_corr = q->delay_cor.rho; 473*1da177e4SLinus Torvalds cor.loss_corr = q->loss_cor.rho; 474*1da177e4SLinus Torvalds cor.dup_corr = q->dup_cor.rho; 475*1da177e4SLinus Torvalds RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 476*1da177e4SLinus Torvalds rta->rta_len = skb->tail - b; 477*1da177e4SLinus Torvalds 478*1da177e4SLinus Torvalds return skb->len; 479*1da177e4SLinus Torvalds 480*1da177e4SLinus Torvalds rtattr_failure: 481*1da177e4SLinus Torvalds skb_trim(skb, b - skb->data); 482*1da177e4SLinus Torvalds return -1; 483*1da177e4SLinus Torvalds } 484*1da177e4SLinus Torvalds 485*1da177e4SLinus Torvalds static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 486*1da177e4SLinus Torvalds struct sk_buff *skb, struct tcmsg *tcm) 487*1da177e4SLinus Torvalds { 488*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 489*1da177e4SLinus Torvalds 490*1da177e4SLinus Torvalds if (cl != 1) /* only one class */ 491*1da177e4SLinus Torvalds return -ENOENT; 492*1da177e4SLinus Torvalds 493*1da177e4SLinus Torvalds tcm->tcm_handle |= TC_H_MIN(1); 494*1da177e4SLinus Torvalds tcm->tcm_info = q->qdisc->handle; 495*1da177e4SLinus Torvalds 496*1da177e4SLinus Torvalds return 0; 497*1da177e4SLinus Torvalds } 498*1da177e4SLinus Torvalds 499*1da177e4SLinus Torvalds static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 500*1da177e4SLinus Torvalds struct Qdisc **old) 501*1da177e4SLinus Torvalds { 502*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 503*1da177e4SLinus Torvalds 504*1da177e4SLinus Torvalds if (new == NULL) 505*1da177e4SLinus Torvalds new = &noop_qdisc; 506*1da177e4SLinus Torvalds 507*1da177e4SLinus Torvalds sch_tree_lock(sch); 508*1da177e4SLinus Torvalds *old = xchg(&q->qdisc, new); 509*1da177e4SLinus Torvalds qdisc_reset(*old); 510*1da177e4SLinus Torvalds sch->q.qlen = 0; 511*1da177e4SLinus Torvalds sch_tree_unlock(sch); 512*1da177e4SLinus Torvalds 513*1da177e4SLinus Torvalds return 0; 514*1da177e4SLinus Torvalds } 515*1da177e4SLinus Torvalds 516*1da177e4SLinus Torvalds static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 517*1da177e4SLinus Torvalds { 518*1da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 519*1da177e4SLinus Torvalds return q->qdisc; 520*1da177e4SLinus Torvalds } 521*1da177e4SLinus Torvalds 522*1da177e4SLinus Torvalds static unsigned long netem_get(struct Qdisc *sch, u32 classid) 523*1da177e4SLinus Torvalds { 524*1da177e4SLinus Torvalds return 1; 525*1da177e4SLinus Torvalds } 526*1da177e4SLinus Torvalds 527*1da177e4SLinus Torvalds static void netem_put(struct Qdisc *sch, unsigned long arg) 528*1da177e4SLinus Torvalds { 529*1da177e4SLinus Torvalds } 530*1da177e4SLinus Torvalds 531*1da177e4SLinus Torvalds static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 532*1da177e4SLinus Torvalds struct rtattr **tca, unsigned long *arg) 533*1da177e4SLinus Torvalds { 534*1da177e4SLinus Torvalds return -ENOSYS; 535*1da177e4SLinus Torvalds } 536*1da177e4SLinus Torvalds 537*1da177e4SLinus Torvalds static int netem_delete(struct Qdisc *sch, unsigned long arg) 538*1da177e4SLinus Torvalds { 539*1da177e4SLinus Torvalds return -ENOSYS; 540*1da177e4SLinus Torvalds } 541*1da177e4SLinus Torvalds 542*1da177e4SLinus Torvalds static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 543*1da177e4SLinus Torvalds { 544*1da177e4SLinus Torvalds if (!walker->stop) { 545*1da177e4SLinus Torvalds if (walker->count >= walker->skip) 546*1da177e4SLinus Torvalds if (walker->fn(sch, 1, walker) < 0) { 547*1da177e4SLinus Torvalds walker->stop = 1; 548*1da177e4SLinus Torvalds return; 549*1da177e4SLinus Torvalds } 550*1da177e4SLinus Torvalds walker->count++; 551*1da177e4SLinus Torvalds } 552*1da177e4SLinus Torvalds } 553*1da177e4SLinus Torvalds 554*1da177e4SLinus Torvalds static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) 555*1da177e4SLinus Torvalds { 556*1da177e4SLinus Torvalds return NULL; 557*1da177e4SLinus Torvalds } 558*1da177e4SLinus Torvalds 559*1da177e4SLinus Torvalds static struct Qdisc_class_ops netem_class_ops = { 560*1da177e4SLinus Torvalds .graft = netem_graft, 561*1da177e4SLinus Torvalds .leaf = netem_leaf, 562*1da177e4SLinus Torvalds .get = netem_get, 563*1da177e4SLinus Torvalds .put = netem_put, 564*1da177e4SLinus Torvalds .change = netem_change_class, 565*1da177e4SLinus Torvalds .delete = netem_delete, 566*1da177e4SLinus Torvalds .walk = netem_walk, 567*1da177e4SLinus Torvalds .tcf_chain = netem_find_tcf, 568*1da177e4SLinus Torvalds .dump = netem_dump_class, 569*1da177e4SLinus Torvalds }; 570*1da177e4SLinus Torvalds 571*1da177e4SLinus Torvalds static struct Qdisc_ops netem_qdisc_ops = { 572*1da177e4SLinus Torvalds .id = "netem", 573*1da177e4SLinus Torvalds .cl_ops = &netem_class_ops, 574*1da177e4SLinus Torvalds .priv_size = sizeof(struct netem_sched_data), 575*1da177e4SLinus Torvalds .enqueue = netem_enqueue, 576*1da177e4SLinus Torvalds .dequeue = netem_dequeue, 577*1da177e4SLinus Torvalds .requeue = netem_requeue, 578*1da177e4SLinus Torvalds .drop = netem_drop, 579*1da177e4SLinus Torvalds .init = netem_init, 580*1da177e4SLinus Torvalds .reset = netem_reset, 581*1da177e4SLinus Torvalds .destroy = netem_destroy, 582*1da177e4SLinus Torvalds .change = netem_change, 583*1da177e4SLinus Torvalds .dump = netem_dump, 584*1da177e4SLinus Torvalds .owner = THIS_MODULE, 585*1da177e4SLinus Torvalds }; 586*1da177e4SLinus Torvalds 587*1da177e4SLinus Torvalds 588*1da177e4SLinus Torvalds static int __init netem_module_init(void) 589*1da177e4SLinus Torvalds { 590*1da177e4SLinus Torvalds return register_qdisc(&netem_qdisc_ops); 591*1da177e4SLinus Torvalds } 592*1da177e4SLinus Torvalds static void __exit netem_module_exit(void) 593*1da177e4SLinus Torvalds { 594*1da177e4SLinus Torvalds unregister_qdisc(&netem_qdisc_ops); 595*1da177e4SLinus Torvalds } 596*1da177e4SLinus Torvalds module_init(netem_module_init) 597*1da177e4SLinus Torvalds module_exit(netem_module_exit) 598*1da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 599