11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * net/sched/sch_netem.c Network emulator 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 51da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 61da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 7798b6b19SStephen Hemminger * 2 of the License. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Many of the algorithms and ideas for this came from 101da177e4SLinus Torvalds * NIST Net which is not copyrighted. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Authors: Stephen Hemminger <shemminger@osdl.org> 131da177e4SLinus Torvalds * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/module.h> 171da177e4SLinus Torvalds #include <linux/types.h> 181da177e4SLinus Torvalds #include <linux/kernel.h> 191da177e4SLinus Torvalds #include <linux/errno.h> 201da177e4SLinus Torvalds #include <linux/skbuff.h> 211da177e4SLinus Torvalds #include <linux/rtnetlink.h> 221da177e4SLinus Torvalds 23dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h> 241da177e4SLinus Torvalds #include <net/pkt_sched.h> 251da177e4SLinus Torvalds 26c865e5d9SStephen Hemminger #define VERSION "1.2" 27eb229c4cSStephen Hemminger 281da177e4SLinus Torvalds /* Network Emulation Queuing algorithm. 291da177e4SLinus Torvalds ==================================== 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 321da177e4SLinus Torvalds Network Emulation Tool 331da177e4SLinus Torvalds [2] Luigi Rizzo, DummyNet for FreeBSD 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds ---------------------------------------------------------------- 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds This started out as a simple way to delay outgoing packets to 381da177e4SLinus Torvalds test TCP but has grown to include most of the functionality 391da177e4SLinus Torvalds of a full blown network emulator like NISTnet. It can delay 401da177e4SLinus Torvalds packets and add random jitter (and correlation). The random 411da177e4SLinus Torvalds distribution can be loaded from a table as well to provide 421da177e4SLinus Torvalds normal, Pareto, or experimental curves. Packet loss, 431da177e4SLinus Torvalds duplication, and reordering can also be emulated. 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds This qdisc does not do classification that can be handled in 461da177e4SLinus Torvalds layering other disciplines. It does not need to do bandwidth 471da177e4SLinus Torvalds control either since that can be handled by using token 481da177e4SLinus Torvalds bucket or other rate control. 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds The simulator is limited by the Linux timer resolution 511da177e4SLinus Torvalds and will create packet bursts on the HZ boundary (1ms). 521da177e4SLinus Torvalds */ 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds struct netem_sched_data { 551da177e4SLinus Torvalds struct Qdisc *qdisc; 5659cb5c67SPatrick McHardy struct qdisc_watchdog watchdog; 571da177e4SLinus Torvalds 58b407621cSStephen Hemminger psched_tdiff_t latency; 59b407621cSStephen Hemminger psched_tdiff_t jitter; 60b407621cSStephen Hemminger 611da177e4SLinus Torvalds u32 loss; 621da177e4SLinus Torvalds u32 limit; 631da177e4SLinus Torvalds u32 counter; 641da177e4SLinus Torvalds u32 gap; 651da177e4SLinus Torvalds u32 duplicate; 660dca51d3SStephen Hemminger u32 reorder; 67c865e5d9SStephen Hemminger u32 corrupt; 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds struct crndstate { 70b407621cSStephen Hemminger u32 last; 71b407621cSStephen Hemminger u32 rho; 72c865e5d9SStephen Hemminger } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds struct disttable { 751da177e4SLinus Torvalds u32 size; 761da177e4SLinus Torvalds s16 table[0]; 771da177e4SLinus Torvalds } *delay_dist; 781da177e4SLinus Torvalds }; 791da177e4SLinus Torvalds 801da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */ 811da177e4SLinus Torvalds struct netem_skb_cb { 821da177e4SLinus Torvalds psched_time_t time_to_send; 831da177e4SLinus Torvalds }; 841da177e4SLinus Torvalds 855f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 865f86173bSJussi Kivilinna { 87175f9c1bSJussi Kivilinna BUILD_BUG_ON(sizeof(skb->cb) < 88175f9c1bSJussi Kivilinna sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); 89175f9c1bSJussi Kivilinna return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 905f86173bSJussi Kivilinna } 915f86173bSJussi Kivilinna 921da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator 931da177e4SLinus Torvalds * Use entropy source for initial seed. 941da177e4SLinus Torvalds */ 951da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho) 961da177e4SLinus Torvalds { 971da177e4SLinus Torvalds state->rho = rho; 981da177e4SLinus Torvalds state->last = net_random(); 991da177e4SLinus Torvalds } 1001da177e4SLinus Torvalds 1011da177e4SLinus Torvalds /* get_crandom - correlated random number generator 1021da177e4SLinus Torvalds * Next number depends on last value. 1031da177e4SLinus Torvalds * rho is scaled to avoid floating point. 1041da177e4SLinus Torvalds */ 105b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state) 1061da177e4SLinus Torvalds { 1071da177e4SLinus Torvalds u64 value, rho; 1081da177e4SLinus Torvalds unsigned long answer; 1091da177e4SLinus Torvalds 110bb2f8cc0SStephen Hemminger if (state->rho == 0) /* no correlation */ 1111da177e4SLinus Torvalds return net_random(); 1121da177e4SLinus Torvalds 1131da177e4SLinus Torvalds value = net_random(); 1141da177e4SLinus Torvalds rho = (u64)state->rho + 1; 1151da177e4SLinus Torvalds answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 1161da177e4SLinus Torvalds state->last = answer; 1171da177e4SLinus Torvalds return answer; 1181da177e4SLinus Torvalds } 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and 1211da177e4SLinus Torvalds * std deviation sigma. Uses table lookup to approximate the desired 1221da177e4SLinus Torvalds * distribution, and a uniformly-distributed pseudo-random source. 1231da177e4SLinus Torvalds */ 124b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 125b407621cSStephen Hemminger struct crndstate *state, 126b407621cSStephen Hemminger const struct disttable *dist) 1271da177e4SLinus Torvalds { 128b407621cSStephen Hemminger psched_tdiff_t x; 129b407621cSStephen Hemminger long t; 130b407621cSStephen Hemminger u32 rnd; 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds if (sigma == 0) 1331da177e4SLinus Torvalds return mu; 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds rnd = get_crandom(state); 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds /* default uniform distribution */ 1381da177e4SLinus Torvalds if (dist == NULL) 1391da177e4SLinus Torvalds return (rnd % (2*sigma)) - sigma + mu; 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds t = dist->table[rnd % dist->size]; 1421da177e4SLinus Torvalds x = (sigma % NETEM_DIST_SCALE) * t; 1431da177e4SLinus Torvalds if (x >= 0) 1441da177e4SLinus Torvalds x += NETEM_DIST_SCALE/2; 1451da177e4SLinus Torvalds else 1461da177e4SLinus Torvalds x -= NETEM_DIST_SCALE/2; 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds 1510afb51e7SStephen Hemminger /* 1520afb51e7SStephen Hemminger * Insert one skb into qdisc. 1530afb51e7SStephen Hemminger * Note: parent depends on return value to account for queue length. 1540afb51e7SStephen Hemminger * NET_XMIT_DROP: queue length didn't change. 1550afb51e7SStephen Hemminger * NET_XMIT_SUCCESS: one skb was queued. 1560afb51e7SStephen Hemminger */ 1571da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1581da177e4SLinus Torvalds { 1591da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 16089e1df74SGuillaume Chazarain /* We don't fill cb now as skb_unshare() may invalidate it */ 16189e1df74SGuillaume Chazarain struct netem_skb_cb *cb; 1620afb51e7SStephen Hemminger struct sk_buff *skb2; 1631da177e4SLinus Torvalds int ret; 1640afb51e7SStephen Hemminger int count = 1; 1651da177e4SLinus Torvalds 166771018e7SStephen Hemminger pr_debug("netem_enqueue skb=%p\n", skb); 1671da177e4SLinus Torvalds 1680afb51e7SStephen Hemminger /* Random duplication */ 1690afb51e7SStephen Hemminger if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 1700afb51e7SStephen Hemminger ++count; 1710afb51e7SStephen Hemminger 1721da177e4SLinus Torvalds /* Random packet drop 0 => none, ~0 => all */ 1730afb51e7SStephen Hemminger if (q->loss && q->loss >= get_crandom(&q->loss_cor)) 1740afb51e7SStephen Hemminger --count; 1750afb51e7SStephen Hemminger 1760afb51e7SStephen Hemminger if (count == 0) { 1771da177e4SLinus Torvalds sch->qstats.drops++; 1781da177e4SLinus Torvalds kfree_skb(skb); 179c27f339aSJarek Poplawski return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 1801da177e4SLinus Torvalds } 1811da177e4SLinus Torvalds 1824e8a5201SDavid S. Miller skb_orphan(skb); 1834e8a5201SDavid S. Miller 1840afb51e7SStephen Hemminger /* 1850afb51e7SStephen Hemminger * If we need to duplicate packet, then re-insert at top of the 1860afb51e7SStephen Hemminger * qdisc tree, since parent queuer expects that only one 1870afb51e7SStephen Hemminger * skb will be queued. 188d5d75cd6SStephen Hemminger */ 1890afb51e7SStephen Hemminger if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 1907698b4fcSDavid S. Miller struct Qdisc *rootq = qdisc_root(sch); 1910afb51e7SStephen Hemminger u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 1920afb51e7SStephen Hemminger q->duplicate = 0; 193d5d75cd6SStephen Hemminger 1945f86173bSJussi Kivilinna qdisc_enqueue_root(skb2, rootq); 1950afb51e7SStephen Hemminger q->duplicate = dupsave; 1961da177e4SLinus Torvalds } 1971da177e4SLinus Torvalds 198c865e5d9SStephen Hemminger /* 199c865e5d9SStephen Hemminger * Randomized packet corruption. 200c865e5d9SStephen Hemminger * Make copy if needed since we are modifying 201c865e5d9SStephen Hemminger * If packet is going to be hardware checksummed, then 202c865e5d9SStephen Hemminger * do it now in software before we mangle it. 203c865e5d9SStephen Hemminger */ 204c865e5d9SStephen Hemminger if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 205c865e5d9SStephen Hemminger if (!(skb = skb_unshare(skb, GFP_ATOMIC)) 20684fa7933SPatrick McHardy || (skb->ip_summed == CHECKSUM_PARTIAL 20784fa7933SPatrick McHardy && skb_checksum_help(skb))) { 208c865e5d9SStephen Hemminger sch->qstats.drops++; 209c865e5d9SStephen Hemminger return NET_XMIT_DROP; 210c865e5d9SStephen Hemminger } 211c865e5d9SStephen Hemminger 212c865e5d9SStephen Hemminger skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); 213c865e5d9SStephen Hemminger } 214c865e5d9SStephen Hemminger 2155f86173bSJussi Kivilinna cb = netem_skb_cb(skb); 2160dca51d3SStephen Hemminger if (q->gap == 0 /* not doing reordering */ 2170dca51d3SStephen Hemminger || q->counter < q->gap /* inside last reordering gap */ 2180dca51d3SStephen Hemminger || q->reorder < get_crandom(&q->reorder_cor)) { 2190f9f32acSStephen Hemminger psched_time_t now; 22007aaa115SStephen Hemminger psched_tdiff_t delay; 22107aaa115SStephen Hemminger 22207aaa115SStephen Hemminger delay = tabledist(q->latency, q->jitter, 22307aaa115SStephen Hemminger &q->delay_cor, q->delay_dist); 22407aaa115SStephen Hemminger 2253bebcda2SPatrick McHardy now = psched_get_time(); 2267c59e25fSPatrick McHardy cb->time_to_send = now + delay; 2271da177e4SLinus Torvalds ++q->counter; 2285f86173bSJussi Kivilinna ret = qdisc_enqueue(skb, q->qdisc); 2291da177e4SLinus Torvalds } else { 2300dca51d3SStephen Hemminger /* 2310dca51d3SStephen Hemminger * Do re-ordering by putting one out of N packets at the front 2320dca51d3SStephen Hemminger * of the queue. 2330dca51d3SStephen Hemminger */ 2343bebcda2SPatrick McHardy cb->time_to_send = psched_get_time(); 2350dca51d3SStephen Hemminger q->counter = 0; 2360f9f32acSStephen Hemminger ret = q->qdisc->ops->requeue(skb, q->qdisc); 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds if (likely(ret == NET_XMIT_SUCCESS)) { 2401da177e4SLinus Torvalds sch->q.qlen++; 2410abf77e5SJussi Kivilinna sch->bstats.bytes += qdisc_pkt_len(skb); 2421da177e4SLinus Torvalds sch->bstats.packets++; 243378a2f09SJarek Poplawski } else if (net_xmit_drop_count(ret)) { 2441da177e4SLinus Torvalds sch->qstats.drops++; 245378a2f09SJarek Poplawski } 2461da177e4SLinus Torvalds 247d5d75cd6SStephen Hemminger pr_debug("netem: enqueue ret %d\n", ret); 2481da177e4SLinus Torvalds return ret; 2491da177e4SLinus Torvalds } 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */ 2521da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) 2531da177e4SLinus Torvalds { 2541da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2551da177e4SLinus Torvalds int ret; 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { 2581da177e4SLinus Torvalds sch->q.qlen++; 2591da177e4SLinus Torvalds sch->qstats.requeues++; 2601da177e4SLinus Torvalds } 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds return ret; 2631da177e4SLinus Torvalds } 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch) 2661da177e4SLinus Torvalds { 2671da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2686d037a26SPatrick McHardy unsigned int len = 0; 2691da177e4SLinus Torvalds 2706d037a26SPatrick McHardy if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { 2711da177e4SLinus Torvalds sch->q.qlen--; 2721da177e4SLinus Torvalds sch->qstats.drops++; 2731da177e4SLinus Torvalds } 2741da177e4SLinus Torvalds return len; 2751da177e4SLinus Torvalds } 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch) 2781da177e4SLinus Torvalds { 2791da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2801da177e4SLinus Torvalds struct sk_buff *skb; 2811da177e4SLinus Torvalds 28211274e5aSStephen Hemminger smp_mb(); 28311274e5aSStephen Hemminger if (sch->flags & TCQ_F_THROTTLED) 28411274e5aSStephen Hemminger return NULL; 28511274e5aSStephen Hemminger 2861da177e4SLinus Torvalds skb = q->qdisc->dequeue(q->qdisc); 287771018e7SStephen Hemminger if (skb) { 2885f86173bSJussi Kivilinna const struct netem_skb_cb *cb = netem_skb_cb(skb); 2893bebcda2SPatrick McHardy psched_time_t now = psched_get_time(); 2900f9f32acSStephen Hemminger 2910f9f32acSStephen Hemminger /* if more time remaining? */ 292104e0878SPatrick McHardy if (cb->time_to_send <= now) { 293771018e7SStephen Hemminger pr_debug("netem_dequeue: return skb=%p\n", skb); 2941da177e4SLinus Torvalds sch->q.qlen--; 2950f9f32acSStephen Hemminger return skb; 29611274e5aSStephen Hemminger } 29707aaa115SStephen Hemminger 29811274e5aSStephen Hemminger if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) { 299e488eafcSPatrick McHardy qdisc_tree_decrease_qlen(q->qdisc, 1); 30007aaa115SStephen Hemminger sch->qstats.drops++; 30111274e5aSStephen Hemminger printk(KERN_ERR "netem: %s could not requeue\n", 30207aaa115SStephen Hemminger q->qdisc->ops->id); 303771018e7SStephen Hemminger } 30411274e5aSStephen Hemminger 30511274e5aSStephen Hemminger qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 3060f9f32acSStephen Hemminger } 3070f9f32acSStephen Hemminger 3080f9f32acSStephen Hemminger return NULL; 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds 3111da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch) 3121da177e4SLinus Torvalds { 3131da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3141da177e4SLinus Torvalds 3151da177e4SLinus Torvalds qdisc_reset(q->qdisc); 3161da177e4SLinus Torvalds sch->q.qlen = 0; 31759cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds 3201da177e4SLinus Torvalds /* 3211da177e4SLinus Torvalds * Distribution data is a variable size payload containing 3221da177e4SLinus Torvalds * signed 16 bit values. 3231da177e4SLinus Torvalds */ 3241e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3271e90474cSPatrick McHardy unsigned long n = nla_len(attr)/sizeof(__s16); 3281e90474cSPatrick McHardy const __s16 *data = nla_data(attr); 3297698b4fcSDavid S. Miller spinlock_t *root_lock; 3301da177e4SLinus Torvalds struct disttable *d; 3311da177e4SLinus Torvalds int i; 3321da177e4SLinus Torvalds 3331da177e4SLinus Torvalds if (n > 65536) 3341da177e4SLinus Torvalds return -EINVAL; 3351da177e4SLinus Torvalds 3361da177e4SLinus Torvalds d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 3371da177e4SLinus Torvalds if (!d) 3381da177e4SLinus Torvalds return -ENOMEM; 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds d->size = n; 3411da177e4SLinus Torvalds for (i = 0; i < n; i++) 3421da177e4SLinus Torvalds d->table[i] = data[i]; 3431da177e4SLinus Torvalds 344102396aeSJarek Poplawski root_lock = qdisc_root_sleeping_lock(sch); 3457698b4fcSDavid S. Miller 3467698b4fcSDavid S. Miller spin_lock_bh(root_lock); 3471da177e4SLinus Torvalds d = xchg(&q->delay_dist, d); 3487698b4fcSDavid S. Miller spin_unlock_bh(root_lock); 3491da177e4SLinus Torvalds 3501da177e4SLinus Torvalds kfree(d); 3511da177e4SLinus Torvalds return 0; 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds 3541e90474cSPatrick McHardy static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) 3551da177e4SLinus Torvalds { 3561da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3571e90474cSPatrick McHardy const struct tc_netem_corr *c = nla_data(attr); 3581da177e4SLinus Torvalds 3591da177e4SLinus Torvalds init_crandom(&q->delay_cor, c->delay_corr); 3601da177e4SLinus Torvalds init_crandom(&q->loss_cor, c->loss_corr); 3611da177e4SLinus Torvalds init_crandom(&q->dup_cor, c->dup_corr); 3621da177e4SLinus Torvalds return 0; 3631da177e4SLinus Torvalds } 3641da177e4SLinus Torvalds 3651e90474cSPatrick McHardy static int get_reorder(struct Qdisc *sch, const struct nlattr *attr) 3660dca51d3SStephen Hemminger { 3670dca51d3SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 3681e90474cSPatrick McHardy const struct tc_netem_reorder *r = nla_data(attr); 3690dca51d3SStephen Hemminger 3700dca51d3SStephen Hemminger q->reorder = r->probability; 3710dca51d3SStephen Hemminger init_crandom(&q->reorder_cor, r->correlation); 3720dca51d3SStephen Hemminger return 0; 3730dca51d3SStephen Hemminger } 3740dca51d3SStephen Hemminger 3751e90474cSPatrick McHardy static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr) 376c865e5d9SStephen Hemminger { 377c865e5d9SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 3781e90474cSPatrick McHardy const struct tc_netem_corrupt *r = nla_data(attr); 379c865e5d9SStephen Hemminger 380c865e5d9SStephen Hemminger q->corrupt = r->probability; 381c865e5d9SStephen Hemminger init_crandom(&q->corrupt_cor, r->correlation); 382c865e5d9SStephen Hemminger return 0; 383c865e5d9SStephen Hemminger } 384c865e5d9SStephen Hemminger 38527a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 38627a3421eSPatrick McHardy [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 38727a3421eSPatrick McHardy [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 38827a3421eSPatrick McHardy [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 38927a3421eSPatrick McHardy }; 39027a3421eSPatrick McHardy 3912c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 3922c10b32bSThomas Graf const struct nla_policy *policy, int len) 3932c10b32bSThomas Graf { 3942c10b32bSThomas Graf int nested_len = nla_len(nla) - NLA_ALIGN(len); 3952c10b32bSThomas Graf 3962c10b32bSThomas Graf if (nested_len < 0) 3972c10b32bSThomas Graf return -EINVAL; 3982c10b32bSThomas Graf if (nested_len >= nla_attr_size(0)) 3992c10b32bSThomas Graf return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 4002c10b32bSThomas Graf nested_len, policy); 4012c10b32bSThomas Graf memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 4022c10b32bSThomas Graf return 0; 4032c10b32bSThomas Graf } 4042c10b32bSThomas Graf 405c865e5d9SStephen Hemminger /* Parse netlink message to set options */ 4061e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt) 4071da177e4SLinus Torvalds { 4081da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 409b03f4672SPatrick McHardy struct nlattr *tb[TCA_NETEM_MAX + 1]; 4101da177e4SLinus Torvalds struct tc_netem_qopt *qopt; 4111da177e4SLinus Torvalds int ret; 4121da177e4SLinus Torvalds 413b03f4672SPatrick McHardy if (opt == NULL) 4141da177e4SLinus Torvalds return -EINVAL; 4151da177e4SLinus Torvalds 4162c10b32bSThomas Graf qopt = nla_data(opt); 4172c10b32bSThomas Graf ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); 418b03f4672SPatrick McHardy if (ret < 0) 419b03f4672SPatrick McHardy return ret; 420b03f4672SPatrick McHardy 421fb0305ceSPatrick McHardy ret = fifo_set_limit(q->qdisc, qopt->limit); 4221da177e4SLinus Torvalds if (ret) { 4231da177e4SLinus Torvalds pr_debug("netem: can't set fifo limit\n"); 4241da177e4SLinus Torvalds return ret; 4251da177e4SLinus Torvalds } 4261da177e4SLinus Torvalds 4271da177e4SLinus Torvalds q->latency = qopt->latency; 4281da177e4SLinus Torvalds q->jitter = qopt->jitter; 4291da177e4SLinus Torvalds q->limit = qopt->limit; 4301da177e4SLinus Torvalds q->gap = qopt->gap; 4310dca51d3SStephen Hemminger q->counter = 0; 4321da177e4SLinus Torvalds q->loss = qopt->loss; 4331da177e4SLinus Torvalds q->duplicate = qopt->duplicate; 4341da177e4SLinus Torvalds 435bb2f8cc0SStephen Hemminger /* for compatibility with earlier versions. 436bb2f8cc0SStephen Hemminger * if gap is set, need to assume 100% probability 4370dca51d3SStephen Hemminger */ 438a362e0a7SStephen Hemminger if (q->gap) 4390dca51d3SStephen Hemminger q->reorder = ~0; 4400dca51d3SStephen Hemminger 4411e90474cSPatrick McHardy if (tb[TCA_NETEM_CORR]) { 4421e90474cSPatrick McHardy ret = get_correlation(sch, tb[TCA_NETEM_CORR]); 4431da177e4SLinus Torvalds if (ret) 4441da177e4SLinus Torvalds return ret; 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 4471e90474cSPatrick McHardy if (tb[TCA_NETEM_DELAY_DIST]) { 4481e90474cSPatrick McHardy ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); 4491da177e4SLinus Torvalds if (ret) 4501da177e4SLinus Torvalds return ret; 4511da177e4SLinus Torvalds } 452c865e5d9SStephen Hemminger 4531e90474cSPatrick McHardy if (tb[TCA_NETEM_REORDER]) { 4541e90474cSPatrick McHardy ret = get_reorder(sch, tb[TCA_NETEM_REORDER]); 4550dca51d3SStephen Hemminger if (ret) 4560dca51d3SStephen Hemminger return ret; 4570dca51d3SStephen Hemminger } 4581da177e4SLinus Torvalds 4591e90474cSPatrick McHardy if (tb[TCA_NETEM_CORRUPT]) { 4601e90474cSPatrick McHardy ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 461c865e5d9SStephen Hemminger if (ret) 462c865e5d9SStephen Hemminger return ret; 463c865e5d9SStephen Hemminger } 4641da177e4SLinus Torvalds 4651da177e4SLinus Torvalds return 0; 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds 468300ce174SStephen Hemminger /* 469300ce174SStephen Hemminger * Special case version of FIFO queue for use by netem. 470300ce174SStephen Hemminger * It queues in order based on timestamps in skb's 471300ce174SStephen Hemminger */ 472300ce174SStephen Hemminger struct fifo_sched_data { 473300ce174SStephen Hemminger u32 limit; 474075aa573SStephen Hemminger psched_time_t oldest; 475300ce174SStephen Hemminger }; 476300ce174SStephen Hemminger 477300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 478300ce174SStephen Hemminger { 479300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 480300ce174SStephen Hemminger struct sk_buff_head *list = &sch->q; 4815f86173bSJussi Kivilinna psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 482300ce174SStephen Hemminger struct sk_buff *skb; 483300ce174SStephen Hemminger 484300ce174SStephen Hemminger if (likely(skb_queue_len(list) < q->limit)) { 485075aa573SStephen Hemminger /* Optimize for add at tail */ 486104e0878SPatrick McHardy if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { 487075aa573SStephen Hemminger q->oldest = tnext; 488075aa573SStephen Hemminger return qdisc_enqueue_tail(nskb, sch); 489075aa573SStephen Hemminger } 490075aa573SStephen Hemminger 491300ce174SStephen Hemminger skb_queue_reverse_walk(list, skb) { 4925f86173bSJussi Kivilinna const struct netem_skb_cb *cb = netem_skb_cb(skb); 493300ce174SStephen Hemminger 494104e0878SPatrick McHardy if (tnext >= cb->time_to_send) 495300ce174SStephen Hemminger break; 496300ce174SStephen Hemminger } 497300ce174SStephen Hemminger 498300ce174SStephen Hemminger __skb_queue_after(list, skb, nskb); 499300ce174SStephen Hemminger 5000abf77e5SJussi Kivilinna sch->qstats.backlog += qdisc_pkt_len(nskb); 5010abf77e5SJussi Kivilinna sch->bstats.bytes += qdisc_pkt_len(nskb); 502300ce174SStephen Hemminger sch->bstats.packets++; 503300ce174SStephen Hemminger 504300ce174SStephen Hemminger return NET_XMIT_SUCCESS; 505300ce174SStephen Hemminger } 506300ce174SStephen Hemminger 507075aa573SStephen Hemminger return qdisc_reshape_fail(nskb, sch); 508300ce174SStephen Hemminger } 509300ce174SStephen Hemminger 5101e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) 511300ce174SStephen Hemminger { 512300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 513300ce174SStephen Hemminger 514300ce174SStephen Hemminger if (opt) { 5151e90474cSPatrick McHardy struct tc_fifo_qopt *ctl = nla_data(opt); 5161e90474cSPatrick McHardy if (nla_len(opt) < sizeof(*ctl)) 517300ce174SStephen Hemminger return -EINVAL; 518300ce174SStephen Hemminger 519300ce174SStephen Hemminger q->limit = ctl->limit; 520300ce174SStephen Hemminger } else 5215ce2d488SDavid S. Miller q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); 522300ce174SStephen Hemminger 523a084980dSPatrick McHardy q->oldest = PSCHED_PASTPERFECT; 524300ce174SStephen Hemminger return 0; 525300ce174SStephen Hemminger } 526300ce174SStephen Hemminger 527300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) 528300ce174SStephen Hemminger { 529300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 530300ce174SStephen Hemminger struct tc_fifo_qopt opt = { .limit = q->limit }; 531300ce174SStephen Hemminger 5321e90474cSPatrick McHardy NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 533300ce174SStephen Hemminger return skb->len; 534300ce174SStephen Hemminger 5351e90474cSPatrick McHardy nla_put_failure: 536300ce174SStephen Hemminger return -1; 537300ce174SStephen Hemminger } 538300ce174SStephen Hemminger 53920fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { 540300ce174SStephen Hemminger .id = "tfifo", 541300ce174SStephen Hemminger .priv_size = sizeof(struct fifo_sched_data), 542300ce174SStephen Hemminger .enqueue = tfifo_enqueue, 543300ce174SStephen Hemminger .dequeue = qdisc_dequeue_head, 5448e3af978SJarek Poplawski .peek = qdisc_peek_head, 545300ce174SStephen Hemminger .requeue = qdisc_requeue, 546300ce174SStephen Hemminger .drop = qdisc_queue_drop, 547300ce174SStephen Hemminger .init = tfifo_init, 548300ce174SStephen Hemminger .reset = qdisc_reset_queue, 549300ce174SStephen Hemminger .change = tfifo_init, 550300ce174SStephen Hemminger .dump = tfifo_dump, 551300ce174SStephen Hemminger }; 552300ce174SStephen Hemminger 5531e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt) 5541da177e4SLinus Torvalds { 5551da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5561da177e4SLinus Torvalds int ret; 5571da177e4SLinus Torvalds 5581da177e4SLinus Torvalds if (!opt) 5591da177e4SLinus Torvalds return -EINVAL; 5601da177e4SLinus Torvalds 56159cb5c67SPatrick McHardy qdisc_watchdog_init(&q->watchdog, sch); 5621da177e4SLinus Torvalds 5635ce2d488SDavid S. Miller q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 564bb949fbdSDavid S. Miller &tfifo_qdisc_ops, 5659f9afec4SPatrick McHardy TC_H_MAKE(sch->handle, 1)); 5661da177e4SLinus Torvalds if (!q->qdisc) { 5671da177e4SLinus Torvalds pr_debug("netem: qdisc create failed\n"); 5681da177e4SLinus Torvalds return -ENOMEM; 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5711da177e4SLinus Torvalds ret = netem_change(sch, opt); 5721da177e4SLinus Torvalds if (ret) { 5731da177e4SLinus Torvalds pr_debug("netem: change failed\n"); 5741da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5751da177e4SLinus Torvalds } 5761da177e4SLinus Torvalds return ret; 5771da177e4SLinus Torvalds } 5781da177e4SLinus Torvalds 5791da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch) 5801da177e4SLinus Torvalds { 5811da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5821da177e4SLinus Torvalds 58359cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 5841da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5851da177e4SLinus Torvalds kfree(q->delay_dist); 5861da177e4SLinus Torvalds } 5871da177e4SLinus Torvalds 5881da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 5891da177e4SLinus Torvalds { 5901da177e4SLinus Torvalds const struct netem_sched_data *q = qdisc_priv(sch); 59127a884dcSArnaldo Carvalho de Melo unsigned char *b = skb_tail_pointer(skb); 5921e90474cSPatrick McHardy struct nlattr *nla = (struct nlattr *) b; 5931da177e4SLinus Torvalds struct tc_netem_qopt qopt; 5941da177e4SLinus Torvalds struct tc_netem_corr cor; 5950dca51d3SStephen Hemminger struct tc_netem_reorder reorder; 596c865e5d9SStephen Hemminger struct tc_netem_corrupt corrupt; 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds qopt.latency = q->latency; 5991da177e4SLinus Torvalds qopt.jitter = q->jitter; 6001da177e4SLinus Torvalds qopt.limit = q->limit; 6011da177e4SLinus Torvalds qopt.loss = q->loss; 6021da177e4SLinus Torvalds qopt.gap = q->gap; 6031da177e4SLinus Torvalds qopt.duplicate = q->duplicate; 6041e90474cSPatrick McHardy NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 6051da177e4SLinus Torvalds 6061da177e4SLinus Torvalds cor.delay_corr = q->delay_cor.rho; 6071da177e4SLinus Torvalds cor.loss_corr = q->loss_cor.rho; 6081da177e4SLinus Torvalds cor.dup_corr = q->dup_cor.rho; 6091e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 6100dca51d3SStephen Hemminger 6110dca51d3SStephen Hemminger reorder.probability = q->reorder; 6120dca51d3SStephen Hemminger reorder.correlation = q->reorder_cor.rho; 6131e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 6140dca51d3SStephen Hemminger 615c865e5d9SStephen Hemminger corrupt.probability = q->corrupt; 616c865e5d9SStephen Hemminger corrupt.correlation = q->corrupt_cor.rho; 6171e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 618c865e5d9SStephen Hemminger 6191e90474cSPatrick McHardy nla->nla_len = skb_tail_pointer(skb) - b; 6201da177e4SLinus Torvalds 6211da177e4SLinus Torvalds return skb->len; 6221da177e4SLinus Torvalds 6231e90474cSPatrick McHardy nla_put_failure: 624dc5fc579SArnaldo Carvalho de Melo nlmsg_trim(skb, b); 6251da177e4SLinus Torvalds return -1; 6261da177e4SLinus Torvalds } 6271da177e4SLinus Torvalds 6281da177e4SLinus Torvalds static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 6291da177e4SLinus Torvalds struct sk_buff *skb, struct tcmsg *tcm) 6301da177e4SLinus Torvalds { 6311da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds if (cl != 1) /* only one class */ 6341da177e4SLinus Torvalds return -ENOENT; 6351da177e4SLinus Torvalds 6361da177e4SLinus Torvalds tcm->tcm_handle |= TC_H_MIN(1); 6371da177e4SLinus Torvalds tcm->tcm_info = q->qdisc->handle; 6381da177e4SLinus Torvalds 6391da177e4SLinus Torvalds return 0; 6401da177e4SLinus Torvalds } 6411da177e4SLinus Torvalds 6421da177e4SLinus Torvalds static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 6431da177e4SLinus Torvalds struct Qdisc **old) 6441da177e4SLinus Torvalds { 6451da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6461da177e4SLinus Torvalds 6471da177e4SLinus Torvalds if (new == NULL) 6481da177e4SLinus Torvalds new = &noop_qdisc; 6491da177e4SLinus Torvalds 6501da177e4SLinus Torvalds sch_tree_lock(sch); 6511da177e4SLinus Torvalds *old = xchg(&q->qdisc, new); 6525e50da01SPatrick McHardy qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 6531da177e4SLinus Torvalds qdisc_reset(*old); 6541da177e4SLinus Torvalds sch_tree_unlock(sch); 6551da177e4SLinus Torvalds 6561da177e4SLinus Torvalds return 0; 6571da177e4SLinus Torvalds } 6581da177e4SLinus Torvalds 6591da177e4SLinus Torvalds static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 6601da177e4SLinus Torvalds { 6611da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6621da177e4SLinus Torvalds return q->qdisc; 6631da177e4SLinus Torvalds } 6641da177e4SLinus Torvalds 6651da177e4SLinus Torvalds static unsigned long netem_get(struct Qdisc *sch, u32 classid) 6661da177e4SLinus Torvalds { 6671da177e4SLinus Torvalds return 1; 6681da177e4SLinus Torvalds } 6691da177e4SLinus Torvalds 6701da177e4SLinus Torvalds static void netem_put(struct Qdisc *sch, unsigned long arg) 6711da177e4SLinus Torvalds { 6721da177e4SLinus Torvalds } 6731da177e4SLinus Torvalds 6741da177e4SLinus Torvalds static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 6751e90474cSPatrick McHardy struct nlattr **tca, unsigned long *arg) 6761da177e4SLinus Torvalds { 6771da177e4SLinus Torvalds return -ENOSYS; 6781da177e4SLinus Torvalds } 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds static int netem_delete(struct Qdisc *sch, unsigned long arg) 6811da177e4SLinus Torvalds { 6821da177e4SLinus Torvalds return -ENOSYS; 6831da177e4SLinus Torvalds } 6841da177e4SLinus Torvalds 6851da177e4SLinus Torvalds static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 6861da177e4SLinus Torvalds { 6871da177e4SLinus Torvalds if (!walker->stop) { 6881da177e4SLinus Torvalds if (walker->count >= walker->skip) 6891da177e4SLinus Torvalds if (walker->fn(sch, 1, walker) < 0) { 6901da177e4SLinus Torvalds walker->stop = 1; 6911da177e4SLinus Torvalds return; 6921da177e4SLinus Torvalds } 6931da177e4SLinus Torvalds walker->count++; 6941da177e4SLinus Torvalds } 6951da177e4SLinus Torvalds } 6961da177e4SLinus Torvalds 6971da177e4SLinus Torvalds static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) 6981da177e4SLinus Torvalds { 6991da177e4SLinus Torvalds return NULL; 7001da177e4SLinus Torvalds } 7011da177e4SLinus Torvalds 70220fea08bSEric Dumazet static const struct Qdisc_class_ops netem_class_ops = { 7031da177e4SLinus Torvalds .graft = netem_graft, 7041da177e4SLinus Torvalds .leaf = netem_leaf, 7051da177e4SLinus Torvalds .get = netem_get, 7061da177e4SLinus Torvalds .put = netem_put, 7071da177e4SLinus Torvalds .change = netem_change_class, 7081da177e4SLinus Torvalds .delete = netem_delete, 7091da177e4SLinus Torvalds .walk = netem_walk, 7101da177e4SLinus Torvalds .tcf_chain = netem_find_tcf, 7111da177e4SLinus Torvalds .dump = netem_dump_class, 7121da177e4SLinus Torvalds }; 7131da177e4SLinus Torvalds 71420fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 7151da177e4SLinus Torvalds .id = "netem", 7161da177e4SLinus Torvalds .cl_ops = &netem_class_ops, 7171da177e4SLinus Torvalds .priv_size = sizeof(struct netem_sched_data), 7181da177e4SLinus Torvalds .enqueue = netem_enqueue, 7191da177e4SLinus Torvalds .dequeue = netem_dequeue, 7201da177e4SLinus Torvalds .requeue = netem_requeue, 7211da177e4SLinus Torvalds .drop = netem_drop, 7221da177e4SLinus Torvalds .init = netem_init, 7231da177e4SLinus Torvalds .reset = netem_reset, 7241da177e4SLinus Torvalds .destroy = netem_destroy, 7251da177e4SLinus Torvalds .change = netem_change, 7261da177e4SLinus Torvalds .dump = netem_dump, 7271da177e4SLinus Torvalds .owner = THIS_MODULE, 7281da177e4SLinus Torvalds }; 7291da177e4SLinus Torvalds 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds static int __init netem_module_init(void) 7321da177e4SLinus Torvalds { 733eb229c4cSStephen Hemminger pr_info("netem: version " VERSION "\n"); 7341da177e4SLinus Torvalds return register_qdisc(&netem_qdisc_ops); 7351da177e4SLinus Torvalds } 7361da177e4SLinus Torvalds static void __exit netem_module_exit(void) 7371da177e4SLinus Torvalds { 7381da177e4SLinus Torvalds unregister_qdisc(&netem_qdisc_ops); 7391da177e4SLinus Torvalds } 7401da177e4SLinus Torvalds module_init(netem_module_init) 7411da177e4SLinus Torvalds module_exit(netem_module_exit) 7421da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 743