11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * net/sched/sch_netem.c Network emulator 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 51da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 61da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 71da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Many of the algorithms and ideas for this came from 101da177e4SLinus Torvalds * NIST Net which is not copyrighted. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Authors: Stephen Hemminger <shemminger@osdl.org> 131da177e4SLinus Torvalds * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/config.h> 171da177e4SLinus Torvalds #include <linux/module.h> 181da177e4SLinus Torvalds #include <linux/bitops.h> 191da177e4SLinus Torvalds #include <linux/types.h> 201da177e4SLinus Torvalds #include <linux/kernel.h> 211da177e4SLinus Torvalds #include <linux/errno.h> 221da177e4SLinus Torvalds #include <linux/netdevice.h> 231da177e4SLinus Torvalds #include <linux/skbuff.h> 241da177e4SLinus Torvalds #include <linux/rtnetlink.h> 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds #include <net/pkt_sched.h> 271da177e4SLinus Torvalds 281da177e4SLinus Torvalds /* Network Emulation Queuing algorithm. 291da177e4SLinus Torvalds ==================================== 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 321da177e4SLinus Torvalds Network Emulation Tool 331da177e4SLinus Torvalds [2] Luigi Rizzo, DummyNet for FreeBSD 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds ---------------------------------------------------------------- 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds This started out as a simple way to delay outgoing packets to 381da177e4SLinus Torvalds test TCP but has grown to include most of the functionality 391da177e4SLinus Torvalds of a full blown network emulator like NISTnet. It can delay 401da177e4SLinus Torvalds packets and add random jitter (and correlation). The random 411da177e4SLinus Torvalds distribution can be loaded from a table as well to provide 421da177e4SLinus Torvalds normal, Pareto, or experimental curves. Packet loss, 431da177e4SLinus Torvalds duplication, and reordering can also be emulated. 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds This qdisc does not do classification that can be handled in 461da177e4SLinus Torvalds layering other disciplines. It does not need to do bandwidth 471da177e4SLinus Torvalds control either since that can be handled by using token 481da177e4SLinus Torvalds bucket or other rate control. 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds The simulator is limited by the Linux timer resolution 511da177e4SLinus Torvalds and will create packet bursts on the HZ boundary (1ms). 521da177e4SLinus Torvalds */ 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds struct netem_sched_data { 551da177e4SLinus Torvalds struct Qdisc *qdisc; 561da177e4SLinus Torvalds struct timer_list timer; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds u32 latency; 591da177e4SLinus Torvalds u32 loss; 601da177e4SLinus Torvalds u32 limit; 611da177e4SLinus Torvalds u32 counter; 621da177e4SLinus Torvalds u32 gap; 631da177e4SLinus Torvalds u32 jitter; 641da177e4SLinus Torvalds u32 duplicate; 650dca51d3SStephen Hemminger u32 reorder; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds struct crndstate { 681da177e4SLinus Torvalds unsigned long last; 691da177e4SLinus Torvalds unsigned long rho; 700dca51d3SStephen Hemminger } delay_cor, loss_cor, dup_cor, reorder_cor; 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds struct disttable { 731da177e4SLinus Torvalds u32 size; 741da177e4SLinus Torvalds s16 table[0]; 751da177e4SLinus Torvalds } *delay_dist; 761da177e4SLinus Torvalds }; 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */ 791da177e4SLinus Torvalds struct netem_skb_cb { 801da177e4SLinus Torvalds psched_time_t time_to_send; 811da177e4SLinus Torvalds }; 821da177e4SLinus Torvalds 831da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator 841da177e4SLinus Torvalds * Use entropy source for initial seed. 851da177e4SLinus Torvalds */ 861da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho) 871da177e4SLinus Torvalds { 881da177e4SLinus Torvalds state->rho = rho; 891da177e4SLinus Torvalds state->last = net_random(); 901da177e4SLinus Torvalds } 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds /* get_crandom - correlated random number generator 931da177e4SLinus Torvalds * Next number depends on last value. 941da177e4SLinus Torvalds * rho is scaled to avoid floating point. 951da177e4SLinus Torvalds */ 961da177e4SLinus Torvalds static unsigned long get_crandom(struct crndstate *state) 971da177e4SLinus Torvalds { 981da177e4SLinus Torvalds u64 value, rho; 991da177e4SLinus Torvalds unsigned long answer; 1001da177e4SLinus Torvalds 1011da177e4SLinus Torvalds if (state->rho == 0) /* no correllation */ 1021da177e4SLinus Torvalds return net_random(); 1031da177e4SLinus Torvalds 1041da177e4SLinus Torvalds value = net_random(); 1051da177e4SLinus Torvalds rho = (u64)state->rho + 1; 1061da177e4SLinus Torvalds answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 1071da177e4SLinus Torvalds state->last = answer; 1081da177e4SLinus Torvalds return answer; 1091da177e4SLinus Torvalds } 1101da177e4SLinus Torvalds 1111da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and 1121da177e4SLinus Torvalds * std deviation sigma. Uses table lookup to approximate the desired 1131da177e4SLinus Torvalds * distribution, and a uniformly-distributed pseudo-random source. 1141da177e4SLinus Torvalds */ 1151da177e4SLinus Torvalds static long tabledist(unsigned long mu, long sigma, 1161da177e4SLinus Torvalds struct crndstate *state, const struct disttable *dist) 1171da177e4SLinus Torvalds { 1181da177e4SLinus Torvalds long t, x; 1191da177e4SLinus Torvalds unsigned long rnd; 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds if (sigma == 0) 1221da177e4SLinus Torvalds return mu; 1231da177e4SLinus Torvalds 1241da177e4SLinus Torvalds rnd = get_crandom(state); 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds /* default uniform distribution */ 1271da177e4SLinus Torvalds if (dist == NULL) 1281da177e4SLinus Torvalds return (rnd % (2*sigma)) - sigma + mu; 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds t = dist->table[rnd % dist->size]; 1311da177e4SLinus Torvalds x = (sigma % NETEM_DIST_SCALE) * t; 1321da177e4SLinus Torvalds if (x >= 0) 1331da177e4SLinus Torvalds x += NETEM_DIST_SCALE/2; 1341da177e4SLinus Torvalds else 1351da177e4SLinus Torvalds x -= NETEM_DIST_SCALE/2; 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 1381da177e4SLinus Torvalds } 1391da177e4SLinus Torvalds 1400afb51e7SStephen Hemminger /* 1410afb51e7SStephen Hemminger * Insert one skb into qdisc. 1420afb51e7SStephen Hemminger * Note: parent depends on return value to account for queue length. 1430afb51e7SStephen Hemminger * NET_XMIT_DROP: queue length didn't change. 1440afb51e7SStephen Hemminger * NET_XMIT_SUCCESS: one skb was queued. 1450afb51e7SStephen Hemminger */ 1461da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1471da177e4SLinus Torvalds { 1481da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 1490f9f32acSStephen Hemminger struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; 1500afb51e7SStephen Hemminger struct sk_buff *skb2; 1511da177e4SLinus Torvalds int ret; 1520afb51e7SStephen Hemminger int count = 1; 1531da177e4SLinus Torvalds 154771018e7SStephen Hemminger pr_debug("netem_enqueue skb=%p\n", skb); 1551da177e4SLinus Torvalds 1560afb51e7SStephen Hemminger /* Random duplication */ 1570afb51e7SStephen Hemminger if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 1580afb51e7SStephen Hemminger ++count; 1590afb51e7SStephen Hemminger 1601da177e4SLinus Torvalds /* Random packet drop 0 => none, ~0 => all */ 1610afb51e7SStephen Hemminger if (q->loss && q->loss >= get_crandom(&q->loss_cor)) 1620afb51e7SStephen Hemminger --count; 1630afb51e7SStephen Hemminger 1640afb51e7SStephen Hemminger if (count == 0) { 1651da177e4SLinus Torvalds sch->qstats.drops++; 1661da177e4SLinus Torvalds kfree_skb(skb); 1670afb51e7SStephen Hemminger return NET_XMIT_DROP; 1681da177e4SLinus Torvalds } 1691da177e4SLinus Torvalds 1700afb51e7SStephen Hemminger /* 1710afb51e7SStephen Hemminger * If we need to duplicate packet, then re-insert at top of the 1720afb51e7SStephen Hemminger * qdisc tree, since parent queuer expects that only one 1730afb51e7SStephen Hemminger * skb will be queued. 174d5d75cd6SStephen Hemminger */ 1750afb51e7SStephen Hemminger if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 1760afb51e7SStephen Hemminger struct Qdisc *rootq = sch->dev->qdisc; 1770afb51e7SStephen Hemminger u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 1780afb51e7SStephen Hemminger q->duplicate = 0; 179d5d75cd6SStephen Hemminger 1800afb51e7SStephen Hemminger rootq->enqueue(skb2, rootq); 1810afb51e7SStephen Hemminger q->duplicate = dupsave; 1821da177e4SLinus Torvalds } 1831da177e4SLinus Torvalds 1840dca51d3SStephen Hemminger if (q->gap == 0 /* not doing reordering */ 1850dca51d3SStephen Hemminger || q->counter < q->gap /* inside last reordering gap */ 1860dca51d3SStephen Hemminger || q->reorder < get_crandom(&q->reorder_cor)) { 1870f9f32acSStephen Hemminger psched_time_t now; 18807aaa115SStephen Hemminger psched_tdiff_t delay; 18907aaa115SStephen Hemminger 19007aaa115SStephen Hemminger delay = tabledist(q->latency, q->jitter, 19107aaa115SStephen Hemminger &q->delay_cor, q->delay_dist); 19207aaa115SStephen Hemminger 1930f9f32acSStephen Hemminger PSCHED_GET_TIME(now); 19407aaa115SStephen Hemminger PSCHED_TADD2(now, delay, cb->time_to_send); 1951da177e4SLinus Torvalds ++q->counter; 1961da177e4SLinus Torvalds ret = q->qdisc->enqueue(skb, q->qdisc); 1971da177e4SLinus Torvalds } else { 1980dca51d3SStephen Hemminger /* 1990dca51d3SStephen Hemminger * Do re-ordering by putting one out of N packets at the front 2000dca51d3SStephen Hemminger * of the queue. 2010dca51d3SStephen Hemminger */ 2020f9f32acSStephen Hemminger PSCHED_GET_TIME(cb->time_to_send); 2030dca51d3SStephen Hemminger q->counter = 0; 2040f9f32acSStephen Hemminger ret = q->qdisc->ops->requeue(skb, q->qdisc); 2051da177e4SLinus Torvalds } 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds if (likely(ret == NET_XMIT_SUCCESS)) { 2081da177e4SLinus Torvalds sch->q.qlen++; 2091da177e4SLinus Torvalds sch->bstats.bytes += skb->len; 2101da177e4SLinus Torvalds sch->bstats.packets++; 2111da177e4SLinus Torvalds } else 2121da177e4SLinus Torvalds sch->qstats.drops++; 2131da177e4SLinus Torvalds 214d5d75cd6SStephen Hemminger pr_debug("netem: enqueue ret %d\n", ret); 2151da177e4SLinus Torvalds return ret; 2161da177e4SLinus Torvalds } 2171da177e4SLinus Torvalds 2181da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */ 2191da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) 2201da177e4SLinus Torvalds { 2211da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2221da177e4SLinus Torvalds int ret; 2231da177e4SLinus Torvalds 2241da177e4SLinus Torvalds if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { 2251da177e4SLinus Torvalds sch->q.qlen++; 2261da177e4SLinus Torvalds sch->qstats.requeues++; 2271da177e4SLinus Torvalds } 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds return ret; 2301da177e4SLinus Torvalds } 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch) 2331da177e4SLinus Torvalds { 2341da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2351da177e4SLinus Torvalds unsigned int len; 2361da177e4SLinus Torvalds 2371da177e4SLinus Torvalds if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) { 2381da177e4SLinus Torvalds sch->q.qlen--; 2391da177e4SLinus Torvalds sch->qstats.drops++; 2401da177e4SLinus Torvalds } 2411da177e4SLinus Torvalds return len; 2421da177e4SLinus Torvalds } 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch) 2451da177e4SLinus Torvalds { 2461da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2471da177e4SLinus Torvalds struct sk_buff *skb; 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds skb = q->qdisc->dequeue(q->qdisc); 250771018e7SStephen Hemminger if (skb) { 2510f9f32acSStephen Hemminger const struct netem_skb_cb *cb 2520f9f32acSStephen Hemminger = (const struct netem_skb_cb *)skb->cb; 2530f9f32acSStephen Hemminger psched_time_t now; 2540f9f32acSStephen Hemminger 2550f9f32acSStephen Hemminger /* if more time remaining? */ 2560f9f32acSStephen Hemminger PSCHED_GET_TIME(now); 25707aaa115SStephen Hemminger 25807aaa115SStephen Hemminger if (PSCHED_TLESS(cb->time_to_send, now)) { 259771018e7SStephen Hemminger pr_debug("netem_dequeue: return skb=%p\n", skb); 2601da177e4SLinus Torvalds sch->q.qlen--; 261771018e7SStephen Hemminger sch->flags &= ~TCQ_F_THROTTLED; 2620f9f32acSStephen Hemminger return skb; 26307aaa115SStephen Hemminger } else { 26407aaa115SStephen Hemminger psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); 26507aaa115SStephen Hemminger 26607aaa115SStephen Hemminger if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { 26707aaa115SStephen Hemminger sch->qstats.drops++; 26807aaa115SStephen Hemminger 26907aaa115SStephen Hemminger /* After this qlen is confused */ 27007aaa115SStephen Hemminger printk(KERN_ERR "netem: queue discpline %s could not requeue\n", 27107aaa115SStephen Hemminger q->qdisc->ops->id); 27207aaa115SStephen Hemminger 27307aaa115SStephen Hemminger sch->q.qlen--; 274771018e7SStephen Hemminger } 275771018e7SStephen Hemminger 27607aaa115SStephen Hemminger mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); 2770f9f32acSStephen Hemminger sch->flags |= TCQ_F_THROTTLED; 27807aaa115SStephen Hemminger } 2790f9f32acSStephen Hemminger } 2800f9f32acSStephen Hemminger 2810f9f32acSStephen Hemminger return NULL; 2821da177e4SLinus Torvalds } 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds static void netem_watchdog(unsigned long arg) 2851da177e4SLinus Torvalds { 2861da177e4SLinus Torvalds struct Qdisc *sch = (struct Qdisc *)arg; 2871da177e4SLinus Torvalds 288771018e7SStephen Hemminger pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen); 289771018e7SStephen Hemminger sch->flags &= ~TCQ_F_THROTTLED; 290771018e7SStephen Hemminger netif_schedule(sch->dev); 2911da177e4SLinus Torvalds } 2921da177e4SLinus Torvalds 2931da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch) 2941da177e4SLinus Torvalds { 2951da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2961da177e4SLinus Torvalds 2971da177e4SLinus Torvalds qdisc_reset(q->qdisc); 2981da177e4SLinus Torvalds sch->q.qlen = 0; 299771018e7SStephen Hemminger sch->flags &= ~TCQ_F_THROTTLED; 3001da177e4SLinus Torvalds del_timer_sync(&q->timer); 3011da177e4SLinus Torvalds } 3021da177e4SLinus Torvalds 303300ce174SStephen Hemminger /* Pass size change message down to embedded FIFO */ 3041da177e4SLinus Torvalds static int set_fifo_limit(struct Qdisc *q, int limit) 3051da177e4SLinus Torvalds { 3061da177e4SLinus Torvalds struct rtattr *rta; 3071da177e4SLinus Torvalds int ret = -ENOMEM; 3081da177e4SLinus Torvalds 309300ce174SStephen Hemminger /* Hack to avoid sending change message to non-FIFO */ 310300ce174SStephen Hemminger if (strncmp(q->ops->id + 1, "fifo", 4) != 0) 311300ce174SStephen Hemminger return 0; 312300ce174SStephen Hemminger 3131da177e4SLinus Torvalds rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 3141da177e4SLinus Torvalds if (rta) { 3151da177e4SLinus Torvalds rta->rta_type = RTM_NEWQDISC; 3161da177e4SLinus Torvalds rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 3171da177e4SLinus Torvalds ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 3181da177e4SLinus Torvalds 3191da177e4SLinus Torvalds ret = q->ops->change(q, rta); 3201da177e4SLinus Torvalds kfree(rta); 3211da177e4SLinus Torvalds } 3221da177e4SLinus Torvalds return ret; 3231da177e4SLinus Torvalds } 3241da177e4SLinus Torvalds 3251da177e4SLinus Torvalds /* 3261da177e4SLinus Torvalds * Distribution data is a variable size payload containing 3271da177e4SLinus Torvalds * signed 16 bit values. 3281da177e4SLinus Torvalds */ 3291da177e4SLinus Torvalds static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr) 3301da177e4SLinus Torvalds { 3311da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3321da177e4SLinus Torvalds unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16); 3331da177e4SLinus Torvalds const __s16 *data = RTA_DATA(attr); 3341da177e4SLinus Torvalds struct disttable *d; 3351da177e4SLinus Torvalds int i; 3361da177e4SLinus Torvalds 3371da177e4SLinus Torvalds if (n > 65536) 3381da177e4SLinus Torvalds return -EINVAL; 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 3411da177e4SLinus Torvalds if (!d) 3421da177e4SLinus Torvalds return -ENOMEM; 3431da177e4SLinus Torvalds 3441da177e4SLinus Torvalds d->size = n; 3451da177e4SLinus Torvalds for (i = 0; i < n; i++) 3461da177e4SLinus Torvalds d->table[i] = data[i]; 3471da177e4SLinus Torvalds 3481da177e4SLinus Torvalds spin_lock_bh(&sch->dev->queue_lock); 3491da177e4SLinus Torvalds d = xchg(&q->delay_dist, d); 3501da177e4SLinus Torvalds spin_unlock_bh(&sch->dev->queue_lock); 3511da177e4SLinus Torvalds 3521da177e4SLinus Torvalds kfree(d); 3531da177e4SLinus Torvalds return 0; 3541da177e4SLinus Torvalds } 3551da177e4SLinus Torvalds 3561da177e4SLinus Torvalds static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) 3571da177e4SLinus Torvalds { 3581da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3591da177e4SLinus Torvalds const struct tc_netem_corr *c = RTA_DATA(attr); 3601da177e4SLinus Torvalds 3611da177e4SLinus Torvalds if (RTA_PAYLOAD(attr) != sizeof(*c)) 3621da177e4SLinus Torvalds return -EINVAL; 3631da177e4SLinus Torvalds 3641da177e4SLinus Torvalds init_crandom(&q->delay_cor, c->delay_corr); 3651da177e4SLinus Torvalds init_crandom(&q->loss_cor, c->loss_corr); 3661da177e4SLinus Torvalds init_crandom(&q->dup_cor, c->dup_corr); 3671da177e4SLinus Torvalds return 0; 3681da177e4SLinus Torvalds } 3691da177e4SLinus Torvalds 3700dca51d3SStephen Hemminger static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) 3710dca51d3SStephen Hemminger { 3720dca51d3SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 3730dca51d3SStephen Hemminger const struct tc_netem_reorder *r = RTA_DATA(attr); 3740dca51d3SStephen Hemminger 3750dca51d3SStephen Hemminger if (RTA_PAYLOAD(attr) != sizeof(*r)) 3760dca51d3SStephen Hemminger return -EINVAL; 3770dca51d3SStephen Hemminger 3780dca51d3SStephen Hemminger q->reorder = r->probability; 3790dca51d3SStephen Hemminger init_crandom(&q->reorder_cor, r->correlation); 3800dca51d3SStephen Hemminger return 0; 3810dca51d3SStephen Hemminger } 3820dca51d3SStephen Hemminger 3831da177e4SLinus Torvalds static int netem_change(struct Qdisc *sch, struct rtattr *opt) 3841da177e4SLinus Torvalds { 3851da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3861da177e4SLinus Torvalds struct tc_netem_qopt *qopt; 3871da177e4SLinus Torvalds int ret; 3881da177e4SLinus Torvalds 3891da177e4SLinus Torvalds if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 3901da177e4SLinus Torvalds return -EINVAL; 3911da177e4SLinus Torvalds 3921da177e4SLinus Torvalds qopt = RTA_DATA(opt); 3931da177e4SLinus Torvalds ret = set_fifo_limit(q->qdisc, qopt->limit); 3941da177e4SLinus Torvalds if (ret) { 3951da177e4SLinus Torvalds pr_debug("netem: can't set fifo limit\n"); 3961da177e4SLinus Torvalds return ret; 3971da177e4SLinus Torvalds } 3981da177e4SLinus Torvalds 3991da177e4SLinus Torvalds q->latency = qopt->latency; 4001da177e4SLinus Torvalds q->jitter = qopt->jitter; 4011da177e4SLinus Torvalds q->limit = qopt->limit; 4021da177e4SLinus Torvalds q->gap = qopt->gap; 4030dca51d3SStephen Hemminger q->counter = 0; 4041da177e4SLinus Torvalds q->loss = qopt->loss; 4051da177e4SLinus Torvalds q->duplicate = qopt->duplicate; 4061da177e4SLinus Torvalds 4070dca51d3SStephen Hemminger /* for compatiablity with earlier versions. 4080dca51d3SStephen Hemminger * if gap is set, need to assume 100% probablity 4090dca51d3SStephen Hemminger */ 4100dca51d3SStephen Hemminger q->reorder = ~0; 4110dca51d3SStephen Hemminger 4121da177e4SLinus Torvalds /* Handle nested options after initial queue options. 4131da177e4SLinus Torvalds * Should have put all options in nested format but too late now. 4141da177e4SLinus Torvalds */ 4151da177e4SLinus Torvalds if (RTA_PAYLOAD(opt) > sizeof(*qopt)) { 4161da177e4SLinus Torvalds struct rtattr *tb[TCA_NETEM_MAX]; 4171da177e4SLinus Torvalds if (rtattr_parse(tb, TCA_NETEM_MAX, 4181da177e4SLinus Torvalds RTA_DATA(opt) + sizeof(*qopt), 4191da177e4SLinus Torvalds RTA_PAYLOAD(opt) - sizeof(*qopt))) 4201da177e4SLinus Torvalds return -EINVAL; 4211da177e4SLinus Torvalds 4221da177e4SLinus Torvalds if (tb[TCA_NETEM_CORR-1]) { 4231da177e4SLinus Torvalds ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]); 4241da177e4SLinus Torvalds if (ret) 4251da177e4SLinus Torvalds return ret; 4261da177e4SLinus Torvalds } 4271da177e4SLinus Torvalds 4281da177e4SLinus Torvalds if (tb[TCA_NETEM_DELAY_DIST-1]) { 4291da177e4SLinus Torvalds ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]); 4301da177e4SLinus Torvalds if (ret) 4311da177e4SLinus Torvalds return ret; 4321da177e4SLinus Torvalds } 4330dca51d3SStephen Hemminger if (tb[TCA_NETEM_REORDER-1]) { 4340dca51d3SStephen Hemminger ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); 4350dca51d3SStephen Hemminger if (ret) 4360dca51d3SStephen Hemminger return ret; 4370dca51d3SStephen Hemminger } 4381da177e4SLinus Torvalds } 4391da177e4SLinus Torvalds 4401da177e4SLinus Torvalds 4411da177e4SLinus Torvalds return 0; 4421da177e4SLinus Torvalds } 4431da177e4SLinus Torvalds 444300ce174SStephen Hemminger /* 445300ce174SStephen Hemminger * Special case version of FIFO queue for use by netem. 446300ce174SStephen Hemminger * It queues in order based on timestamps in skb's 447300ce174SStephen Hemminger */ 448300ce174SStephen Hemminger struct fifo_sched_data { 449300ce174SStephen Hemminger u32 limit; 450300ce174SStephen Hemminger }; 451300ce174SStephen Hemminger 452300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 453300ce174SStephen Hemminger { 454300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 455300ce174SStephen Hemminger struct sk_buff_head *list = &sch->q; 456300ce174SStephen Hemminger const struct netem_skb_cb *ncb 457300ce174SStephen Hemminger = (const struct netem_skb_cb *)nskb->cb; 458300ce174SStephen Hemminger struct sk_buff *skb; 459300ce174SStephen Hemminger 460300ce174SStephen Hemminger if (likely(skb_queue_len(list) < q->limit)) { 461300ce174SStephen Hemminger skb_queue_reverse_walk(list, skb) { 462300ce174SStephen Hemminger const struct netem_skb_cb *cb 463300ce174SStephen Hemminger = (const struct netem_skb_cb *)skb->cb; 464300ce174SStephen Hemminger 465300ce174SStephen Hemminger if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send)) 466300ce174SStephen Hemminger break; 467300ce174SStephen Hemminger } 468300ce174SStephen Hemminger 469300ce174SStephen Hemminger __skb_queue_after(list, skb, nskb); 470300ce174SStephen Hemminger 471300ce174SStephen Hemminger sch->qstats.backlog += nskb->len; 472300ce174SStephen Hemminger sch->bstats.bytes += nskb->len; 473300ce174SStephen Hemminger sch->bstats.packets++; 474300ce174SStephen Hemminger 475300ce174SStephen Hemminger return NET_XMIT_SUCCESS; 476300ce174SStephen Hemminger } 477300ce174SStephen Hemminger 478300ce174SStephen Hemminger return qdisc_drop(nskb, sch); 479300ce174SStephen Hemminger } 480300ce174SStephen Hemminger 481300ce174SStephen Hemminger static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) 482300ce174SStephen Hemminger { 483300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 484300ce174SStephen Hemminger 485300ce174SStephen Hemminger if (opt) { 486300ce174SStephen Hemminger struct tc_fifo_qopt *ctl = RTA_DATA(opt); 487300ce174SStephen Hemminger if (RTA_PAYLOAD(opt) < sizeof(*ctl)) 488300ce174SStephen Hemminger return -EINVAL; 489300ce174SStephen Hemminger 490300ce174SStephen Hemminger q->limit = ctl->limit; 491300ce174SStephen Hemminger } else 492300ce174SStephen Hemminger q->limit = max_t(u32, sch->dev->tx_queue_len, 1); 493300ce174SStephen Hemminger 494300ce174SStephen Hemminger return 0; 495300ce174SStephen Hemminger } 496300ce174SStephen Hemminger 497300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) 498300ce174SStephen Hemminger { 499300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 500300ce174SStephen Hemminger struct tc_fifo_qopt opt = { .limit = q->limit }; 501300ce174SStephen Hemminger 502300ce174SStephen Hemminger RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 503300ce174SStephen Hemminger return skb->len; 504300ce174SStephen Hemminger 505300ce174SStephen Hemminger rtattr_failure: 506300ce174SStephen Hemminger return -1; 507300ce174SStephen Hemminger } 508300ce174SStephen Hemminger 509300ce174SStephen Hemminger static struct Qdisc_ops tfifo_qdisc_ops = { 510300ce174SStephen Hemminger .id = "tfifo", 511300ce174SStephen Hemminger .priv_size = sizeof(struct fifo_sched_data), 512300ce174SStephen Hemminger .enqueue = tfifo_enqueue, 513300ce174SStephen Hemminger .dequeue = qdisc_dequeue_head, 514300ce174SStephen Hemminger .requeue = qdisc_requeue, 515300ce174SStephen Hemminger .drop = qdisc_queue_drop, 516300ce174SStephen Hemminger .init = tfifo_init, 517300ce174SStephen Hemminger .reset = qdisc_reset_queue, 518300ce174SStephen Hemminger .change = tfifo_init, 519300ce174SStephen Hemminger .dump = tfifo_dump, 520300ce174SStephen Hemminger }; 521300ce174SStephen Hemminger 5221da177e4SLinus Torvalds static int netem_init(struct Qdisc *sch, struct rtattr *opt) 5231da177e4SLinus Torvalds { 5241da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5251da177e4SLinus Torvalds int ret; 5261da177e4SLinus Torvalds 5271da177e4SLinus Torvalds if (!opt) 5281da177e4SLinus Torvalds return -EINVAL; 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds init_timer(&q->timer); 5311da177e4SLinus Torvalds q->timer.function = netem_watchdog; 5321da177e4SLinus Torvalds q->timer.data = (unsigned long) sch; 5331da177e4SLinus Torvalds 534300ce174SStephen Hemminger q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); 5351da177e4SLinus Torvalds if (!q->qdisc) { 5361da177e4SLinus Torvalds pr_debug("netem: qdisc create failed\n"); 5371da177e4SLinus Torvalds return -ENOMEM; 5381da177e4SLinus Torvalds } 5391da177e4SLinus Torvalds 5401da177e4SLinus Torvalds ret = netem_change(sch, opt); 5411da177e4SLinus Torvalds if (ret) { 5421da177e4SLinus Torvalds pr_debug("netem: change failed\n"); 5431da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5441da177e4SLinus Torvalds } 5451da177e4SLinus Torvalds return ret; 5461da177e4SLinus Torvalds } 5471da177e4SLinus Torvalds 5481da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch) 5491da177e4SLinus Torvalds { 5501da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5511da177e4SLinus Torvalds 5521da177e4SLinus Torvalds del_timer_sync(&q->timer); 5531da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5541da177e4SLinus Torvalds kfree(q->delay_dist); 5551da177e4SLinus Torvalds } 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 5581da177e4SLinus Torvalds { 5591da177e4SLinus Torvalds const struct netem_sched_data *q = qdisc_priv(sch); 5601da177e4SLinus Torvalds unsigned char *b = skb->tail; 5611da177e4SLinus Torvalds struct rtattr *rta = (struct rtattr *) b; 5621da177e4SLinus Torvalds struct tc_netem_qopt qopt; 5631da177e4SLinus Torvalds struct tc_netem_corr cor; 5640dca51d3SStephen Hemminger struct tc_netem_reorder reorder; 5651da177e4SLinus Torvalds 5661da177e4SLinus Torvalds qopt.latency = q->latency; 5671da177e4SLinus Torvalds qopt.jitter = q->jitter; 5681da177e4SLinus Torvalds qopt.limit = q->limit; 5691da177e4SLinus Torvalds qopt.loss = q->loss; 5701da177e4SLinus Torvalds qopt.gap = q->gap; 5711da177e4SLinus Torvalds qopt.duplicate = q->duplicate; 5721da177e4SLinus Torvalds RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 5731da177e4SLinus Torvalds 5741da177e4SLinus Torvalds cor.delay_corr = q->delay_cor.rho; 5751da177e4SLinus Torvalds cor.loss_corr = q->loss_cor.rho; 5761da177e4SLinus Torvalds cor.dup_corr = q->dup_cor.rho; 5771da177e4SLinus Torvalds RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 5780dca51d3SStephen Hemminger 5790dca51d3SStephen Hemminger reorder.probability = q->reorder; 5800dca51d3SStephen Hemminger reorder.correlation = q->reorder_cor.rho; 5810dca51d3SStephen Hemminger RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 5820dca51d3SStephen Hemminger 5831da177e4SLinus Torvalds rta->rta_len = skb->tail - b; 5841da177e4SLinus Torvalds 5851da177e4SLinus Torvalds return skb->len; 5861da177e4SLinus Torvalds 5871da177e4SLinus Torvalds rtattr_failure: 5881da177e4SLinus Torvalds skb_trim(skb, b - skb->data); 5891da177e4SLinus Torvalds return -1; 5901da177e4SLinus Torvalds } 5911da177e4SLinus Torvalds 5921da177e4SLinus Torvalds static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 5931da177e4SLinus Torvalds struct sk_buff *skb, struct tcmsg *tcm) 5941da177e4SLinus Torvalds { 5951da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5961da177e4SLinus Torvalds 5971da177e4SLinus Torvalds if (cl != 1) /* only one class */ 5981da177e4SLinus Torvalds return -ENOENT; 5991da177e4SLinus Torvalds 6001da177e4SLinus Torvalds tcm->tcm_handle |= TC_H_MIN(1); 6011da177e4SLinus Torvalds tcm->tcm_info = q->qdisc->handle; 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds return 0; 6041da177e4SLinus Torvalds } 6051da177e4SLinus Torvalds 6061da177e4SLinus Torvalds static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 6071da177e4SLinus Torvalds struct Qdisc **old) 6081da177e4SLinus Torvalds { 6091da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6101da177e4SLinus Torvalds 6111da177e4SLinus Torvalds if (new == NULL) 6121da177e4SLinus Torvalds new = &noop_qdisc; 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds sch_tree_lock(sch); 6151da177e4SLinus Torvalds *old = xchg(&q->qdisc, new); 6161da177e4SLinus Torvalds qdisc_reset(*old); 6171da177e4SLinus Torvalds sch->q.qlen = 0; 6181da177e4SLinus Torvalds sch_tree_unlock(sch); 6191da177e4SLinus Torvalds 6201da177e4SLinus Torvalds return 0; 6211da177e4SLinus Torvalds } 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 6241da177e4SLinus Torvalds { 6251da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6261da177e4SLinus Torvalds return q->qdisc; 6271da177e4SLinus Torvalds } 6281da177e4SLinus Torvalds 6291da177e4SLinus Torvalds static unsigned long netem_get(struct Qdisc *sch, u32 classid) 6301da177e4SLinus Torvalds { 6311da177e4SLinus Torvalds return 1; 6321da177e4SLinus Torvalds } 6331da177e4SLinus Torvalds 6341da177e4SLinus Torvalds static void netem_put(struct Qdisc *sch, unsigned long arg) 6351da177e4SLinus Torvalds { 6361da177e4SLinus Torvalds } 6371da177e4SLinus Torvalds 6381da177e4SLinus Torvalds static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 6391da177e4SLinus Torvalds struct rtattr **tca, unsigned long *arg) 6401da177e4SLinus Torvalds { 6411da177e4SLinus Torvalds return -ENOSYS; 6421da177e4SLinus Torvalds } 6431da177e4SLinus Torvalds 6441da177e4SLinus Torvalds static int netem_delete(struct Qdisc *sch, unsigned long arg) 6451da177e4SLinus Torvalds { 6461da177e4SLinus Torvalds return -ENOSYS; 6471da177e4SLinus Torvalds } 6481da177e4SLinus Torvalds 6491da177e4SLinus Torvalds static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 6501da177e4SLinus Torvalds { 6511da177e4SLinus Torvalds if (!walker->stop) { 6521da177e4SLinus Torvalds if (walker->count >= walker->skip) 6531da177e4SLinus Torvalds if (walker->fn(sch, 1, walker) < 0) { 6541da177e4SLinus Torvalds walker->stop = 1; 6551da177e4SLinus Torvalds return; 6561da177e4SLinus Torvalds } 6571da177e4SLinus Torvalds walker->count++; 6581da177e4SLinus Torvalds } 6591da177e4SLinus Torvalds } 6601da177e4SLinus Torvalds 6611da177e4SLinus Torvalds static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) 6621da177e4SLinus Torvalds { 6631da177e4SLinus Torvalds return NULL; 6641da177e4SLinus Torvalds } 6651da177e4SLinus Torvalds 6661da177e4SLinus Torvalds static struct Qdisc_class_ops netem_class_ops = { 6671da177e4SLinus Torvalds .graft = netem_graft, 6681da177e4SLinus Torvalds .leaf = netem_leaf, 6691da177e4SLinus Torvalds .get = netem_get, 6701da177e4SLinus Torvalds .put = netem_put, 6711da177e4SLinus Torvalds .change = netem_change_class, 6721da177e4SLinus Torvalds .delete = netem_delete, 6731da177e4SLinus Torvalds .walk = netem_walk, 6741da177e4SLinus Torvalds .tcf_chain = netem_find_tcf, 6751da177e4SLinus Torvalds .dump = netem_dump_class, 6761da177e4SLinus Torvalds }; 6771da177e4SLinus Torvalds 6781da177e4SLinus Torvalds static struct Qdisc_ops netem_qdisc_ops = { 6791da177e4SLinus Torvalds .id = "netem", 6801da177e4SLinus Torvalds .cl_ops = &netem_class_ops, 6811da177e4SLinus Torvalds .priv_size = sizeof(struct netem_sched_data), 6821da177e4SLinus Torvalds .enqueue = netem_enqueue, 6831da177e4SLinus Torvalds .dequeue = netem_dequeue, 6841da177e4SLinus Torvalds .requeue = netem_requeue, 6851da177e4SLinus Torvalds .drop = netem_drop, 6861da177e4SLinus Torvalds .init = netem_init, 6871da177e4SLinus Torvalds .reset = netem_reset, 6881da177e4SLinus Torvalds .destroy = netem_destroy, 6891da177e4SLinus Torvalds .change = netem_change, 6901da177e4SLinus Torvalds .dump = netem_dump, 6911da177e4SLinus Torvalds .owner = THIS_MODULE, 6921da177e4SLinus Torvalds }; 6931da177e4SLinus Torvalds 6941da177e4SLinus Torvalds 6951da177e4SLinus Torvalds static int __init netem_module_init(void) 6961da177e4SLinus Torvalds { 6971da177e4SLinus Torvalds return register_qdisc(&netem_qdisc_ops); 6981da177e4SLinus Torvalds } 6991da177e4SLinus Torvalds static void __exit netem_module_exit(void) 7001da177e4SLinus Torvalds { 7011da177e4SLinus Torvalds unregister_qdisc(&netem_qdisc_ops); 7021da177e4SLinus Torvalds } 7031da177e4SLinus Torvalds module_init(netem_module_init) 7041da177e4SLinus Torvalds module_exit(netem_module_exit) 7051da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 706