11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * net/sched/sch_netem.c Network emulator 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 51da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 61da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 7798b6b19SStephen Hemminger * 2 of the License. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Many of the algorithms and ideas for this came from 101da177e4SLinus Torvalds * NIST Net which is not copyrighted. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Authors: Stephen Hemminger <shemminger@osdl.org> 131da177e4SLinus Torvalds * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/module.h> 171da177e4SLinus Torvalds #include <linux/types.h> 181da177e4SLinus Torvalds #include <linux/kernel.h> 191da177e4SLinus Torvalds #include <linux/errno.h> 201da177e4SLinus Torvalds #include <linux/skbuff.h> 211da177e4SLinus Torvalds #include <linux/rtnetlink.h> 221da177e4SLinus Torvalds 23dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h> 241da177e4SLinus Torvalds #include <net/pkt_sched.h> 251da177e4SLinus Torvalds 26c865e5d9SStephen Hemminger #define VERSION "1.2" 27eb229c4cSStephen Hemminger 281da177e4SLinus Torvalds /* Network Emulation Queuing algorithm. 291da177e4SLinus Torvalds ==================================== 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 321da177e4SLinus Torvalds Network Emulation Tool 331da177e4SLinus Torvalds [2] Luigi Rizzo, DummyNet for FreeBSD 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds ---------------------------------------------------------------- 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds This started out as a simple way to delay outgoing packets to 381da177e4SLinus Torvalds test TCP but has grown to include most of the functionality 391da177e4SLinus Torvalds of a full blown network emulator like NISTnet. It can delay 401da177e4SLinus Torvalds packets and add random jitter (and correlation). The random 411da177e4SLinus Torvalds distribution can be loaded from a table as well to provide 421da177e4SLinus Torvalds normal, Pareto, or experimental curves. Packet loss, 431da177e4SLinus Torvalds duplication, and reordering can also be emulated. 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds This qdisc does not do classification that can be handled in 461da177e4SLinus Torvalds layering other disciplines. It does not need to do bandwidth 471da177e4SLinus Torvalds control either since that can be handled by using token 481da177e4SLinus Torvalds bucket or other rate control. 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds The simulator is limited by the Linux timer resolution 511da177e4SLinus Torvalds and will create packet bursts on the HZ boundary (1ms). 521da177e4SLinus Torvalds */ 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds struct netem_sched_data { 551da177e4SLinus Torvalds struct Qdisc *qdisc; 5659cb5c67SPatrick McHardy struct qdisc_watchdog watchdog; 571da177e4SLinus Torvalds 58b407621cSStephen Hemminger psched_tdiff_t latency; 59b407621cSStephen Hemminger psched_tdiff_t jitter; 60b407621cSStephen Hemminger 611da177e4SLinus Torvalds u32 loss; 621da177e4SLinus Torvalds u32 limit; 631da177e4SLinus Torvalds u32 counter; 641da177e4SLinus Torvalds u32 gap; 651da177e4SLinus Torvalds u32 duplicate; 660dca51d3SStephen Hemminger u32 reorder; 67c865e5d9SStephen Hemminger u32 corrupt; 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds struct crndstate { 70b407621cSStephen Hemminger u32 last; 71b407621cSStephen Hemminger u32 rho; 72c865e5d9SStephen Hemminger } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds struct disttable { 751da177e4SLinus Torvalds u32 size; 761da177e4SLinus Torvalds s16 table[0]; 771da177e4SLinus Torvalds } *delay_dist; 781da177e4SLinus Torvalds }; 791da177e4SLinus Torvalds 801da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */ 811da177e4SLinus Torvalds struct netem_skb_cb { 821da177e4SLinus Torvalds psched_time_t time_to_send; 831da177e4SLinus Torvalds }; 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator 861da177e4SLinus Torvalds * Use entropy source for initial seed. 871da177e4SLinus Torvalds */ 881da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho) 891da177e4SLinus Torvalds { 901da177e4SLinus Torvalds state->rho = rho; 911da177e4SLinus Torvalds state->last = net_random(); 921da177e4SLinus Torvalds } 931da177e4SLinus Torvalds 941da177e4SLinus Torvalds /* get_crandom - correlated random number generator 951da177e4SLinus Torvalds * Next number depends on last value. 961da177e4SLinus Torvalds * rho is scaled to avoid floating point. 971da177e4SLinus Torvalds */ 98b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state) 991da177e4SLinus Torvalds { 1001da177e4SLinus Torvalds u64 value, rho; 1011da177e4SLinus Torvalds unsigned long answer; 1021da177e4SLinus Torvalds 103bb2f8cc0SStephen Hemminger if (state->rho == 0) /* no correlation */ 1041da177e4SLinus Torvalds return net_random(); 1051da177e4SLinus Torvalds 1061da177e4SLinus Torvalds value = net_random(); 1071da177e4SLinus Torvalds rho = (u64)state->rho + 1; 1081da177e4SLinus Torvalds answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 1091da177e4SLinus Torvalds state->last = answer; 1101da177e4SLinus Torvalds return answer; 1111da177e4SLinus Torvalds } 1121da177e4SLinus Torvalds 1131da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and 1141da177e4SLinus Torvalds * std deviation sigma. Uses table lookup to approximate the desired 1151da177e4SLinus Torvalds * distribution, and a uniformly-distributed pseudo-random source. 1161da177e4SLinus Torvalds */ 117b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 118b407621cSStephen Hemminger struct crndstate *state, 119b407621cSStephen Hemminger const struct disttable *dist) 1201da177e4SLinus Torvalds { 121b407621cSStephen Hemminger psched_tdiff_t x; 122b407621cSStephen Hemminger long t; 123b407621cSStephen Hemminger u32 rnd; 1241da177e4SLinus Torvalds 1251da177e4SLinus Torvalds if (sigma == 0) 1261da177e4SLinus Torvalds return mu; 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds rnd = get_crandom(state); 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds /* default uniform distribution */ 1311da177e4SLinus Torvalds if (dist == NULL) 1321da177e4SLinus Torvalds return (rnd % (2*sigma)) - sigma + mu; 1331da177e4SLinus Torvalds 1341da177e4SLinus Torvalds t = dist->table[rnd % dist->size]; 1351da177e4SLinus Torvalds x = (sigma % NETEM_DIST_SCALE) * t; 1361da177e4SLinus Torvalds if (x >= 0) 1371da177e4SLinus Torvalds x += NETEM_DIST_SCALE/2; 1381da177e4SLinus Torvalds else 1391da177e4SLinus Torvalds x -= NETEM_DIST_SCALE/2; 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 1421da177e4SLinus Torvalds } 1431da177e4SLinus Torvalds 1440afb51e7SStephen Hemminger /* 1450afb51e7SStephen Hemminger * Insert one skb into qdisc. 1460afb51e7SStephen Hemminger * Note: parent depends on return value to account for queue length. 1470afb51e7SStephen Hemminger * NET_XMIT_DROP: queue length didn't change. 1480afb51e7SStephen Hemminger * NET_XMIT_SUCCESS: one skb was queued. 1490afb51e7SStephen Hemminger */ 1501da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1511da177e4SLinus Torvalds { 1521da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 15389e1df74SGuillaume Chazarain /* We don't fill cb now as skb_unshare() may invalidate it */ 15489e1df74SGuillaume Chazarain struct netem_skb_cb *cb; 1550afb51e7SStephen Hemminger struct sk_buff *skb2; 1561da177e4SLinus Torvalds int ret; 1570afb51e7SStephen Hemminger int count = 1; 1581da177e4SLinus Torvalds 159771018e7SStephen Hemminger pr_debug("netem_enqueue skb=%p\n", skb); 1601da177e4SLinus Torvalds 1610afb51e7SStephen Hemminger /* Random duplication */ 1620afb51e7SStephen Hemminger if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 1630afb51e7SStephen Hemminger ++count; 1640afb51e7SStephen Hemminger 1651da177e4SLinus Torvalds /* Random packet drop 0 => none, ~0 => all */ 1660afb51e7SStephen Hemminger if (q->loss && q->loss >= get_crandom(&q->loss_cor)) 1670afb51e7SStephen Hemminger --count; 1680afb51e7SStephen Hemminger 1690afb51e7SStephen Hemminger if (count == 0) { 1701da177e4SLinus Torvalds sch->qstats.drops++; 1711da177e4SLinus Torvalds kfree_skb(skb); 17289bbb0a3SStephen Hemminger return NET_XMIT_BYPASS; 1731da177e4SLinus Torvalds } 1741da177e4SLinus Torvalds 1754e8a5201SDavid S. Miller skb_orphan(skb); 1764e8a5201SDavid S. Miller 1770afb51e7SStephen Hemminger /* 1780afb51e7SStephen Hemminger * If we need to duplicate packet, then re-insert at top of the 1790afb51e7SStephen Hemminger * qdisc tree, since parent queuer expects that only one 1800afb51e7SStephen Hemminger * skb will be queued. 181d5d75cd6SStephen Hemminger */ 1820afb51e7SStephen Hemminger if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 1830afb51e7SStephen Hemminger struct Qdisc *rootq = sch->dev->qdisc; 1840afb51e7SStephen Hemminger u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 1850afb51e7SStephen Hemminger q->duplicate = 0; 186d5d75cd6SStephen Hemminger 1870afb51e7SStephen Hemminger rootq->enqueue(skb2, rootq); 1880afb51e7SStephen Hemminger q->duplicate = dupsave; 1891da177e4SLinus Torvalds } 1901da177e4SLinus Torvalds 191c865e5d9SStephen Hemminger /* 192c865e5d9SStephen Hemminger * Randomized packet corruption. 193c865e5d9SStephen Hemminger * Make copy if needed since we are modifying 194c865e5d9SStephen Hemminger * If packet is going to be hardware checksummed, then 195c865e5d9SStephen Hemminger * do it now in software before we mangle it. 196c865e5d9SStephen Hemminger */ 197c865e5d9SStephen Hemminger if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 198c865e5d9SStephen Hemminger if (!(skb = skb_unshare(skb, GFP_ATOMIC)) 19984fa7933SPatrick McHardy || (skb->ip_summed == CHECKSUM_PARTIAL 20084fa7933SPatrick McHardy && skb_checksum_help(skb))) { 201c865e5d9SStephen Hemminger sch->qstats.drops++; 202c865e5d9SStephen Hemminger return NET_XMIT_DROP; 203c865e5d9SStephen Hemminger } 204c865e5d9SStephen Hemminger 205c865e5d9SStephen Hemminger skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); 206c865e5d9SStephen Hemminger } 207c865e5d9SStephen Hemminger 20889e1df74SGuillaume Chazarain cb = (struct netem_skb_cb *)skb->cb; 2090dca51d3SStephen Hemminger if (q->gap == 0 /* not doing reordering */ 2100dca51d3SStephen Hemminger || q->counter < q->gap /* inside last reordering gap */ 2110dca51d3SStephen Hemminger || q->reorder < get_crandom(&q->reorder_cor)) { 2120f9f32acSStephen Hemminger psched_time_t now; 21307aaa115SStephen Hemminger psched_tdiff_t delay; 21407aaa115SStephen Hemminger 21507aaa115SStephen Hemminger delay = tabledist(q->latency, q->jitter, 21607aaa115SStephen Hemminger &q->delay_cor, q->delay_dist); 21707aaa115SStephen Hemminger 2183bebcda2SPatrick McHardy now = psched_get_time(); 2197c59e25fSPatrick McHardy cb->time_to_send = now + delay; 2201da177e4SLinus Torvalds ++q->counter; 2211da177e4SLinus Torvalds ret = q->qdisc->enqueue(skb, q->qdisc); 2221da177e4SLinus Torvalds } else { 2230dca51d3SStephen Hemminger /* 2240dca51d3SStephen Hemminger * Do re-ordering by putting one out of N packets at the front 2250dca51d3SStephen Hemminger * of the queue. 2260dca51d3SStephen Hemminger */ 2273bebcda2SPatrick McHardy cb->time_to_send = psched_get_time(); 2280dca51d3SStephen Hemminger q->counter = 0; 2290f9f32acSStephen Hemminger ret = q->qdisc->ops->requeue(skb, q->qdisc); 2301da177e4SLinus Torvalds } 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds if (likely(ret == NET_XMIT_SUCCESS)) { 2331da177e4SLinus Torvalds sch->q.qlen++; 2341da177e4SLinus Torvalds sch->bstats.bytes += skb->len; 2351da177e4SLinus Torvalds sch->bstats.packets++; 2361da177e4SLinus Torvalds } else 2371da177e4SLinus Torvalds sch->qstats.drops++; 2381da177e4SLinus Torvalds 239d5d75cd6SStephen Hemminger pr_debug("netem: enqueue ret %d\n", ret); 2401da177e4SLinus Torvalds return ret; 2411da177e4SLinus Torvalds } 2421da177e4SLinus Torvalds 2431da177e4SLinus Torvalds /* Requeue packets but don't change time stamp */ 2441da177e4SLinus Torvalds static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) 2451da177e4SLinus Torvalds { 2461da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2471da177e4SLinus Torvalds int ret; 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { 2501da177e4SLinus Torvalds sch->q.qlen++; 2511da177e4SLinus Torvalds sch->qstats.requeues++; 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds return ret; 2551da177e4SLinus Torvalds } 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch) 2581da177e4SLinus Torvalds { 2591da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2606d037a26SPatrick McHardy unsigned int len = 0; 2611da177e4SLinus Torvalds 2626d037a26SPatrick McHardy if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { 2631da177e4SLinus Torvalds sch->q.qlen--; 2641da177e4SLinus Torvalds sch->qstats.drops++; 2651da177e4SLinus Torvalds } 2661da177e4SLinus Torvalds return len; 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch) 2701da177e4SLinus Torvalds { 2711da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2721da177e4SLinus Torvalds struct sk_buff *skb; 2731da177e4SLinus Torvalds 27411274e5aSStephen Hemminger smp_mb(); 27511274e5aSStephen Hemminger if (sch->flags & TCQ_F_THROTTLED) 27611274e5aSStephen Hemminger return NULL; 27711274e5aSStephen Hemminger 2781da177e4SLinus Torvalds skb = q->qdisc->dequeue(q->qdisc); 279771018e7SStephen Hemminger if (skb) { 2800f9f32acSStephen Hemminger const struct netem_skb_cb *cb 2810f9f32acSStephen Hemminger = (const struct netem_skb_cb *)skb->cb; 2823bebcda2SPatrick McHardy psched_time_t now = psched_get_time(); 2830f9f32acSStephen Hemminger 2840f9f32acSStephen Hemminger /* if more time remaining? */ 285104e0878SPatrick McHardy if (cb->time_to_send <= now) { 286771018e7SStephen Hemminger pr_debug("netem_dequeue: return skb=%p\n", skb); 2871da177e4SLinus Torvalds sch->q.qlen--; 2880f9f32acSStephen Hemminger return skb; 28911274e5aSStephen Hemminger } 29007aaa115SStephen Hemminger 29111274e5aSStephen Hemminger if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) { 292e488eafcSPatrick McHardy qdisc_tree_decrease_qlen(q->qdisc, 1); 29307aaa115SStephen Hemminger sch->qstats.drops++; 29411274e5aSStephen Hemminger printk(KERN_ERR "netem: %s could not requeue\n", 29507aaa115SStephen Hemminger q->qdisc->ops->id); 296771018e7SStephen Hemminger } 29711274e5aSStephen Hemminger 29811274e5aSStephen Hemminger qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 2990f9f32acSStephen Hemminger } 3000f9f32acSStephen Hemminger 3010f9f32acSStephen Hemminger return NULL; 3021da177e4SLinus Torvalds } 3031da177e4SLinus Torvalds 3041da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch) 3051da177e4SLinus Torvalds { 3061da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3071da177e4SLinus Torvalds 3081da177e4SLinus Torvalds qdisc_reset(q->qdisc); 3091da177e4SLinus Torvalds sch->q.qlen = 0; 31059cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds 313300ce174SStephen Hemminger /* Pass size change message down to embedded FIFO */ 3141da177e4SLinus Torvalds static int set_fifo_limit(struct Qdisc *q, int limit) 3151da177e4SLinus Torvalds { 3161e90474cSPatrick McHardy struct nlattr *nla; 3171da177e4SLinus Torvalds int ret = -ENOMEM; 3181da177e4SLinus Torvalds 319300ce174SStephen Hemminger /* Hack to avoid sending change message to non-FIFO */ 320300ce174SStephen Hemminger if (strncmp(q->ops->id + 1, "fifo", 4) != 0) 321300ce174SStephen Hemminger return 0; 322300ce174SStephen Hemminger 3231e90474cSPatrick McHardy nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 3241e90474cSPatrick McHardy if (nla) { 3251e90474cSPatrick McHardy nla->nla_type = RTM_NEWQDISC; 3261e90474cSPatrick McHardy nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); 3271e90474cSPatrick McHardy ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; 3281da177e4SLinus Torvalds 3291e90474cSPatrick McHardy ret = q->ops->change(q, nla); 3301e90474cSPatrick McHardy kfree(nla); 3311da177e4SLinus Torvalds } 3321da177e4SLinus Torvalds return ret; 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds 3351da177e4SLinus Torvalds /* 3361da177e4SLinus Torvalds * Distribution data is a variable size payload containing 3371da177e4SLinus Torvalds * signed 16 bit values. 3381da177e4SLinus Torvalds */ 3391e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 3401da177e4SLinus Torvalds { 3411da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3421e90474cSPatrick McHardy unsigned long n = nla_len(attr)/sizeof(__s16); 3431e90474cSPatrick McHardy const __s16 *data = nla_data(attr); 3441da177e4SLinus Torvalds struct disttable *d; 3451da177e4SLinus Torvalds int i; 3461da177e4SLinus Torvalds 3471da177e4SLinus Torvalds if (n > 65536) 3481da177e4SLinus Torvalds return -EINVAL; 3491da177e4SLinus Torvalds 3501da177e4SLinus Torvalds d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 3511da177e4SLinus Torvalds if (!d) 3521da177e4SLinus Torvalds return -ENOMEM; 3531da177e4SLinus Torvalds 3541da177e4SLinus Torvalds d->size = n; 3551da177e4SLinus Torvalds for (i = 0; i < n; i++) 3561da177e4SLinus Torvalds d->table[i] = data[i]; 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds spin_lock_bh(&sch->dev->queue_lock); 3591da177e4SLinus Torvalds d = xchg(&q->delay_dist, d); 3601da177e4SLinus Torvalds spin_unlock_bh(&sch->dev->queue_lock); 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds kfree(d); 3631da177e4SLinus Torvalds return 0; 3641da177e4SLinus Torvalds } 3651da177e4SLinus Torvalds 3661e90474cSPatrick McHardy static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) 3671da177e4SLinus Torvalds { 3681da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3691e90474cSPatrick McHardy const struct tc_netem_corr *c = nla_data(attr); 3701da177e4SLinus Torvalds 3711da177e4SLinus Torvalds init_crandom(&q->delay_cor, c->delay_corr); 3721da177e4SLinus Torvalds init_crandom(&q->loss_cor, c->loss_corr); 3731da177e4SLinus Torvalds init_crandom(&q->dup_cor, c->dup_corr); 3741da177e4SLinus Torvalds return 0; 3751da177e4SLinus Torvalds } 3761da177e4SLinus Torvalds 3771e90474cSPatrick McHardy static int get_reorder(struct Qdisc *sch, const struct nlattr *attr) 3780dca51d3SStephen Hemminger { 3790dca51d3SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 3801e90474cSPatrick McHardy const struct tc_netem_reorder *r = nla_data(attr); 3810dca51d3SStephen Hemminger 3820dca51d3SStephen Hemminger q->reorder = r->probability; 3830dca51d3SStephen Hemminger init_crandom(&q->reorder_cor, r->correlation); 3840dca51d3SStephen Hemminger return 0; 3850dca51d3SStephen Hemminger } 3860dca51d3SStephen Hemminger 3871e90474cSPatrick McHardy static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr) 388c865e5d9SStephen Hemminger { 389c865e5d9SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 3901e90474cSPatrick McHardy const struct tc_netem_corrupt *r = nla_data(attr); 391c865e5d9SStephen Hemminger 392c865e5d9SStephen Hemminger q->corrupt = r->probability; 393c865e5d9SStephen Hemminger init_crandom(&q->corrupt_cor, r->correlation); 394c865e5d9SStephen Hemminger return 0; 395c865e5d9SStephen Hemminger } 396c865e5d9SStephen Hemminger 39727a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 39827a3421eSPatrick McHardy [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 39927a3421eSPatrick McHardy [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 40027a3421eSPatrick McHardy [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 40127a3421eSPatrick McHardy }; 40227a3421eSPatrick McHardy 403c865e5d9SStephen Hemminger /* Parse netlink message to set options */ 4041e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt) 4051da177e4SLinus Torvalds { 4061da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 407b03f4672SPatrick McHardy struct nlattr *tb[TCA_NETEM_MAX + 1]; 4081da177e4SLinus Torvalds struct tc_netem_qopt *qopt; 4091da177e4SLinus Torvalds int ret; 4101da177e4SLinus Torvalds 411b03f4672SPatrick McHardy if (opt == NULL) 4121da177e4SLinus Torvalds return -EINVAL; 4131da177e4SLinus Torvalds 41427a3421eSPatrick McHardy ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy, 41527a3421eSPatrick McHardy qopt, sizeof(*qopt)); 416b03f4672SPatrick McHardy if (ret < 0) 417b03f4672SPatrick McHardy return ret; 418b03f4672SPatrick McHardy 4191da177e4SLinus Torvalds ret = set_fifo_limit(q->qdisc, qopt->limit); 4201da177e4SLinus Torvalds if (ret) { 4211da177e4SLinus Torvalds pr_debug("netem: can't set fifo limit\n"); 4221da177e4SLinus Torvalds return ret; 4231da177e4SLinus Torvalds } 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds q->latency = qopt->latency; 4261da177e4SLinus Torvalds q->jitter = qopt->jitter; 4271da177e4SLinus Torvalds q->limit = qopt->limit; 4281da177e4SLinus Torvalds q->gap = qopt->gap; 4290dca51d3SStephen Hemminger q->counter = 0; 4301da177e4SLinus Torvalds q->loss = qopt->loss; 4311da177e4SLinus Torvalds q->duplicate = qopt->duplicate; 4321da177e4SLinus Torvalds 433bb2f8cc0SStephen Hemminger /* for compatibility with earlier versions. 434bb2f8cc0SStephen Hemminger * if gap is set, need to assume 100% probability 4350dca51d3SStephen Hemminger */ 436a362e0a7SStephen Hemminger if (q->gap) 4370dca51d3SStephen Hemminger q->reorder = ~0; 4380dca51d3SStephen Hemminger 4391e90474cSPatrick McHardy if (tb[TCA_NETEM_CORR]) { 4401e90474cSPatrick McHardy ret = get_correlation(sch, tb[TCA_NETEM_CORR]); 4411da177e4SLinus Torvalds if (ret) 4421da177e4SLinus Torvalds return ret; 4431da177e4SLinus Torvalds } 4441da177e4SLinus Torvalds 4451e90474cSPatrick McHardy if (tb[TCA_NETEM_DELAY_DIST]) { 4461e90474cSPatrick McHardy ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); 4471da177e4SLinus Torvalds if (ret) 4481da177e4SLinus Torvalds return ret; 4491da177e4SLinus Torvalds } 450c865e5d9SStephen Hemminger 4511e90474cSPatrick McHardy if (tb[TCA_NETEM_REORDER]) { 4521e90474cSPatrick McHardy ret = get_reorder(sch, tb[TCA_NETEM_REORDER]); 4530dca51d3SStephen Hemminger if (ret) 4540dca51d3SStephen Hemminger return ret; 4550dca51d3SStephen Hemminger } 4561da177e4SLinus Torvalds 4571e90474cSPatrick McHardy if (tb[TCA_NETEM_CORRUPT]) { 4581e90474cSPatrick McHardy ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 459c865e5d9SStephen Hemminger if (ret) 460c865e5d9SStephen Hemminger return ret; 461c865e5d9SStephen Hemminger } 4621da177e4SLinus Torvalds 4631da177e4SLinus Torvalds return 0; 4641da177e4SLinus Torvalds } 4651da177e4SLinus Torvalds 466300ce174SStephen Hemminger /* 467300ce174SStephen Hemminger * Special case version of FIFO queue for use by netem. 468300ce174SStephen Hemminger * It queues in order based on timestamps in skb's 469300ce174SStephen Hemminger */ 470300ce174SStephen Hemminger struct fifo_sched_data { 471300ce174SStephen Hemminger u32 limit; 472075aa573SStephen Hemminger psched_time_t oldest; 473300ce174SStephen Hemminger }; 474300ce174SStephen Hemminger 475300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 476300ce174SStephen Hemminger { 477300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 478300ce174SStephen Hemminger struct sk_buff_head *list = &sch->q; 479075aa573SStephen Hemminger psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send; 480300ce174SStephen Hemminger struct sk_buff *skb; 481300ce174SStephen Hemminger 482300ce174SStephen Hemminger if (likely(skb_queue_len(list) < q->limit)) { 483075aa573SStephen Hemminger /* Optimize for add at tail */ 484104e0878SPatrick McHardy if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { 485075aa573SStephen Hemminger q->oldest = tnext; 486075aa573SStephen Hemminger return qdisc_enqueue_tail(nskb, sch); 487075aa573SStephen Hemminger } 488075aa573SStephen Hemminger 489300ce174SStephen Hemminger skb_queue_reverse_walk(list, skb) { 490300ce174SStephen Hemminger const struct netem_skb_cb *cb 491300ce174SStephen Hemminger = (const struct netem_skb_cb *)skb->cb; 492300ce174SStephen Hemminger 493104e0878SPatrick McHardy if (tnext >= cb->time_to_send) 494300ce174SStephen Hemminger break; 495300ce174SStephen Hemminger } 496300ce174SStephen Hemminger 497300ce174SStephen Hemminger __skb_queue_after(list, skb, nskb); 498300ce174SStephen Hemminger 499300ce174SStephen Hemminger sch->qstats.backlog += nskb->len; 500300ce174SStephen Hemminger sch->bstats.bytes += nskb->len; 501300ce174SStephen Hemminger sch->bstats.packets++; 502300ce174SStephen Hemminger 503300ce174SStephen Hemminger return NET_XMIT_SUCCESS; 504300ce174SStephen Hemminger } 505300ce174SStephen Hemminger 506075aa573SStephen Hemminger return qdisc_reshape_fail(nskb, sch); 507300ce174SStephen Hemminger } 508300ce174SStephen Hemminger 5091e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) 510300ce174SStephen Hemminger { 511300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 512300ce174SStephen Hemminger 513300ce174SStephen Hemminger if (opt) { 5141e90474cSPatrick McHardy struct tc_fifo_qopt *ctl = nla_data(opt); 5151e90474cSPatrick McHardy if (nla_len(opt) < sizeof(*ctl)) 516300ce174SStephen Hemminger return -EINVAL; 517300ce174SStephen Hemminger 518300ce174SStephen Hemminger q->limit = ctl->limit; 519300ce174SStephen Hemminger } else 520300ce174SStephen Hemminger q->limit = max_t(u32, sch->dev->tx_queue_len, 1); 521300ce174SStephen Hemminger 522a084980dSPatrick McHardy q->oldest = PSCHED_PASTPERFECT; 523300ce174SStephen Hemminger return 0; 524300ce174SStephen Hemminger } 525300ce174SStephen Hemminger 526300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) 527300ce174SStephen Hemminger { 528300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 529300ce174SStephen Hemminger struct tc_fifo_qopt opt = { .limit = q->limit }; 530300ce174SStephen Hemminger 5311e90474cSPatrick McHardy NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 532300ce174SStephen Hemminger return skb->len; 533300ce174SStephen Hemminger 5341e90474cSPatrick McHardy nla_put_failure: 535300ce174SStephen Hemminger return -1; 536300ce174SStephen Hemminger } 537300ce174SStephen Hemminger 53820fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { 539300ce174SStephen Hemminger .id = "tfifo", 540300ce174SStephen Hemminger .priv_size = sizeof(struct fifo_sched_data), 541300ce174SStephen Hemminger .enqueue = tfifo_enqueue, 542300ce174SStephen Hemminger .dequeue = qdisc_dequeue_head, 543300ce174SStephen Hemminger .requeue = qdisc_requeue, 544300ce174SStephen Hemminger .drop = qdisc_queue_drop, 545300ce174SStephen Hemminger .init = tfifo_init, 546300ce174SStephen Hemminger .reset = qdisc_reset_queue, 547300ce174SStephen Hemminger .change = tfifo_init, 548300ce174SStephen Hemminger .dump = tfifo_dump, 549300ce174SStephen Hemminger }; 550300ce174SStephen Hemminger 5511e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt) 5521da177e4SLinus Torvalds { 5531da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5541da177e4SLinus Torvalds int ret; 5551da177e4SLinus Torvalds 5561da177e4SLinus Torvalds if (!opt) 5571da177e4SLinus Torvalds return -EINVAL; 5581da177e4SLinus Torvalds 55959cb5c67SPatrick McHardy qdisc_watchdog_init(&q->watchdog, sch); 5601da177e4SLinus Torvalds 5619f9afec4SPatrick McHardy q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, 5629f9afec4SPatrick McHardy TC_H_MAKE(sch->handle, 1)); 5631da177e4SLinus Torvalds if (!q->qdisc) { 5641da177e4SLinus Torvalds pr_debug("netem: qdisc create failed\n"); 5651da177e4SLinus Torvalds return -ENOMEM; 5661da177e4SLinus Torvalds } 5671da177e4SLinus Torvalds 5681da177e4SLinus Torvalds ret = netem_change(sch, opt); 5691da177e4SLinus Torvalds if (ret) { 5701da177e4SLinus Torvalds pr_debug("netem: change failed\n"); 5711da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5721da177e4SLinus Torvalds } 5731da177e4SLinus Torvalds return ret; 5741da177e4SLinus Torvalds } 5751da177e4SLinus Torvalds 5761da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch) 5771da177e4SLinus Torvalds { 5781da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5791da177e4SLinus Torvalds 58059cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 5811da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5821da177e4SLinus Torvalds kfree(q->delay_dist); 5831da177e4SLinus Torvalds } 5841da177e4SLinus Torvalds 5851da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 5861da177e4SLinus Torvalds { 5871da177e4SLinus Torvalds const struct netem_sched_data *q = qdisc_priv(sch); 58827a884dcSArnaldo Carvalho de Melo unsigned char *b = skb_tail_pointer(skb); 5891e90474cSPatrick McHardy struct nlattr *nla = (struct nlattr *) b; 5901da177e4SLinus Torvalds struct tc_netem_qopt qopt; 5911da177e4SLinus Torvalds struct tc_netem_corr cor; 5920dca51d3SStephen Hemminger struct tc_netem_reorder reorder; 593c865e5d9SStephen Hemminger struct tc_netem_corrupt corrupt; 5941da177e4SLinus Torvalds 5951da177e4SLinus Torvalds qopt.latency = q->latency; 5961da177e4SLinus Torvalds qopt.jitter = q->jitter; 5971da177e4SLinus Torvalds qopt.limit = q->limit; 5981da177e4SLinus Torvalds qopt.loss = q->loss; 5991da177e4SLinus Torvalds qopt.gap = q->gap; 6001da177e4SLinus Torvalds qopt.duplicate = q->duplicate; 6011e90474cSPatrick McHardy NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds cor.delay_corr = q->delay_cor.rho; 6041da177e4SLinus Torvalds cor.loss_corr = q->loss_cor.rho; 6051da177e4SLinus Torvalds cor.dup_corr = q->dup_cor.rho; 6061e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 6070dca51d3SStephen Hemminger 6080dca51d3SStephen Hemminger reorder.probability = q->reorder; 6090dca51d3SStephen Hemminger reorder.correlation = q->reorder_cor.rho; 6101e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 6110dca51d3SStephen Hemminger 612c865e5d9SStephen Hemminger corrupt.probability = q->corrupt; 613c865e5d9SStephen Hemminger corrupt.correlation = q->corrupt_cor.rho; 6141e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 615c865e5d9SStephen Hemminger 6161e90474cSPatrick McHardy nla->nla_len = skb_tail_pointer(skb) - b; 6171da177e4SLinus Torvalds 6181da177e4SLinus Torvalds return skb->len; 6191da177e4SLinus Torvalds 6201e90474cSPatrick McHardy nla_put_failure: 621dc5fc579SArnaldo Carvalho de Melo nlmsg_trim(skb, b); 6221da177e4SLinus Torvalds return -1; 6231da177e4SLinus Torvalds } 6241da177e4SLinus Torvalds 6251da177e4SLinus Torvalds static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 6261da177e4SLinus Torvalds struct sk_buff *skb, struct tcmsg *tcm) 6271da177e4SLinus Torvalds { 6281da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6291da177e4SLinus Torvalds 6301da177e4SLinus Torvalds if (cl != 1) /* only one class */ 6311da177e4SLinus Torvalds return -ENOENT; 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds tcm->tcm_handle |= TC_H_MIN(1); 6341da177e4SLinus Torvalds tcm->tcm_info = q->qdisc->handle; 6351da177e4SLinus Torvalds 6361da177e4SLinus Torvalds return 0; 6371da177e4SLinus Torvalds } 6381da177e4SLinus Torvalds 6391da177e4SLinus Torvalds static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 6401da177e4SLinus Torvalds struct Qdisc **old) 6411da177e4SLinus Torvalds { 6421da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6431da177e4SLinus Torvalds 6441da177e4SLinus Torvalds if (new == NULL) 6451da177e4SLinus Torvalds new = &noop_qdisc; 6461da177e4SLinus Torvalds 6471da177e4SLinus Torvalds sch_tree_lock(sch); 6481da177e4SLinus Torvalds *old = xchg(&q->qdisc, new); 6495e50da01SPatrick McHardy qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 6501da177e4SLinus Torvalds qdisc_reset(*old); 6511da177e4SLinus Torvalds sch_tree_unlock(sch); 6521da177e4SLinus Torvalds 6531da177e4SLinus Torvalds return 0; 6541da177e4SLinus Torvalds } 6551da177e4SLinus Torvalds 6561da177e4SLinus Torvalds static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 6571da177e4SLinus Torvalds { 6581da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6591da177e4SLinus Torvalds return q->qdisc; 6601da177e4SLinus Torvalds } 6611da177e4SLinus Torvalds 6621da177e4SLinus Torvalds static unsigned long netem_get(struct Qdisc *sch, u32 classid) 6631da177e4SLinus Torvalds { 6641da177e4SLinus Torvalds return 1; 6651da177e4SLinus Torvalds } 6661da177e4SLinus Torvalds 6671da177e4SLinus Torvalds static void netem_put(struct Qdisc *sch, unsigned long arg) 6681da177e4SLinus Torvalds { 6691da177e4SLinus Torvalds } 6701da177e4SLinus Torvalds 6711da177e4SLinus Torvalds static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 6721e90474cSPatrick McHardy struct nlattr **tca, unsigned long *arg) 6731da177e4SLinus Torvalds { 6741da177e4SLinus Torvalds return -ENOSYS; 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds 6771da177e4SLinus Torvalds static int netem_delete(struct Qdisc *sch, unsigned long arg) 6781da177e4SLinus Torvalds { 6791da177e4SLinus Torvalds return -ENOSYS; 6801da177e4SLinus Torvalds } 6811da177e4SLinus Torvalds 6821da177e4SLinus Torvalds static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 6831da177e4SLinus Torvalds { 6841da177e4SLinus Torvalds if (!walker->stop) { 6851da177e4SLinus Torvalds if (walker->count >= walker->skip) 6861da177e4SLinus Torvalds if (walker->fn(sch, 1, walker) < 0) { 6871da177e4SLinus Torvalds walker->stop = 1; 6881da177e4SLinus Torvalds return; 6891da177e4SLinus Torvalds } 6901da177e4SLinus Torvalds walker->count++; 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds } 6931da177e4SLinus Torvalds 6941da177e4SLinus Torvalds static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) 6951da177e4SLinus Torvalds { 6961da177e4SLinus Torvalds return NULL; 6971da177e4SLinus Torvalds } 6981da177e4SLinus Torvalds 69920fea08bSEric Dumazet static const struct Qdisc_class_ops netem_class_ops = { 7001da177e4SLinus Torvalds .graft = netem_graft, 7011da177e4SLinus Torvalds .leaf = netem_leaf, 7021da177e4SLinus Torvalds .get = netem_get, 7031da177e4SLinus Torvalds .put = netem_put, 7041da177e4SLinus Torvalds .change = netem_change_class, 7051da177e4SLinus Torvalds .delete = netem_delete, 7061da177e4SLinus Torvalds .walk = netem_walk, 7071da177e4SLinus Torvalds .tcf_chain = netem_find_tcf, 7081da177e4SLinus Torvalds .dump = netem_dump_class, 7091da177e4SLinus Torvalds }; 7101da177e4SLinus Torvalds 71120fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 7121da177e4SLinus Torvalds .id = "netem", 7131da177e4SLinus Torvalds .cl_ops = &netem_class_ops, 7141da177e4SLinus Torvalds .priv_size = sizeof(struct netem_sched_data), 7151da177e4SLinus Torvalds .enqueue = netem_enqueue, 7161da177e4SLinus Torvalds .dequeue = netem_dequeue, 7171da177e4SLinus Torvalds .requeue = netem_requeue, 7181da177e4SLinus Torvalds .drop = netem_drop, 7191da177e4SLinus Torvalds .init = netem_init, 7201da177e4SLinus Torvalds .reset = netem_reset, 7211da177e4SLinus Torvalds .destroy = netem_destroy, 7221da177e4SLinus Torvalds .change = netem_change, 7231da177e4SLinus Torvalds .dump = netem_dump, 7241da177e4SLinus Torvalds .owner = THIS_MODULE, 7251da177e4SLinus Torvalds }; 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds 7281da177e4SLinus Torvalds static int __init netem_module_init(void) 7291da177e4SLinus Torvalds { 730eb229c4cSStephen Hemminger pr_info("netem: version " VERSION "\n"); 7311da177e4SLinus Torvalds return register_qdisc(&netem_qdisc_ops); 7321da177e4SLinus Torvalds } 7331da177e4SLinus Torvalds static void __exit netem_module_exit(void) 7341da177e4SLinus Torvalds { 7351da177e4SLinus Torvalds unregister_qdisc(&netem_qdisc_ops); 7361da177e4SLinus Torvalds } 7371da177e4SLinus Torvalds module_init(netem_module_init) 7381da177e4SLinus Torvalds module_exit(netem_module_exit) 7391da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 740