11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * net/sched/sch_netem.c Network emulator 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 51da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 61da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 7798b6b19SStephen Hemminger * 2 of the License. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Many of the algorithms and ideas for this came from 101da177e4SLinus Torvalds * NIST Net which is not copyrighted. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Authors: Stephen Hemminger <shemminger@osdl.org> 131da177e4SLinus Torvalds * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <linux/module.h> 175a0e3ad6STejun Heo #include <linux/slab.h> 181da177e4SLinus Torvalds #include <linux/types.h> 191da177e4SLinus Torvalds #include <linux/kernel.h> 201da177e4SLinus Torvalds #include <linux/errno.h> 211da177e4SLinus Torvalds #include <linux/skbuff.h> 221da177e4SLinus Torvalds #include <linux/rtnetlink.h> 231da177e4SLinus Torvalds 24dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h> 251da177e4SLinus Torvalds #include <net/pkt_sched.h> 261da177e4SLinus Torvalds 27c865e5d9SStephen Hemminger #define VERSION "1.2" 28eb229c4cSStephen Hemminger 291da177e4SLinus Torvalds /* Network Emulation Queuing algorithm. 301da177e4SLinus Torvalds ==================================== 311da177e4SLinus Torvalds 321da177e4SLinus Torvalds Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 331da177e4SLinus Torvalds Network Emulation Tool 341da177e4SLinus Torvalds [2] Luigi Rizzo, DummyNet for FreeBSD 351da177e4SLinus Torvalds 361da177e4SLinus Torvalds ---------------------------------------------------------------- 371da177e4SLinus Torvalds 381da177e4SLinus Torvalds This started out as a simple way to delay outgoing packets to 391da177e4SLinus Torvalds test TCP but has grown to include most of the functionality 401da177e4SLinus Torvalds of a full blown network emulator like NISTnet. It can delay 411da177e4SLinus Torvalds packets and add random jitter (and correlation). The random 421da177e4SLinus Torvalds distribution can be loaded from a table as well to provide 431da177e4SLinus Torvalds normal, Pareto, or experimental curves. Packet loss, 441da177e4SLinus Torvalds duplication, and reordering can also be emulated. 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds This qdisc does not do classification that can be handled in 471da177e4SLinus Torvalds layering other disciplines. It does not need to do bandwidth 481da177e4SLinus Torvalds control either since that can be handled by using token 491da177e4SLinus Torvalds bucket or other rate control. 501da177e4SLinus Torvalds */ 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds struct netem_sched_data { 531da177e4SLinus Torvalds struct Qdisc *qdisc; 5459cb5c67SPatrick McHardy struct qdisc_watchdog watchdog; 551da177e4SLinus Torvalds 56b407621cSStephen Hemminger psched_tdiff_t latency; 57b407621cSStephen Hemminger psched_tdiff_t jitter; 58b407621cSStephen Hemminger 591da177e4SLinus Torvalds u32 loss; 601da177e4SLinus Torvalds u32 limit; 611da177e4SLinus Torvalds u32 counter; 621da177e4SLinus Torvalds u32 gap; 631da177e4SLinus Torvalds u32 duplicate; 640dca51d3SStephen Hemminger u32 reorder; 65c865e5d9SStephen Hemminger u32 corrupt; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds struct crndstate { 68b407621cSStephen Hemminger u32 last; 69b407621cSStephen Hemminger u32 rho; 70c865e5d9SStephen Hemminger } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds struct disttable { 731da177e4SLinus Torvalds u32 size; 741da177e4SLinus Torvalds s16 table[0]; 751da177e4SLinus Torvalds } *delay_dist; 761da177e4SLinus Torvalds }; 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds /* Time stamp put into socket buffer control block */ 791da177e4SLinus Torvalds struct netem_skb_cb { 801da177e4SLinus Torvalds psched_time_t time_to_send; 811da177e4SLinus Torvalds }; 821da177e4SLinus Torvalds 835f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 845f86173bSJussi Kivilinna { 85175f9c1bSJussi Kivilinna BUILD_BUG_ON(sizeof(skb->cb) < 86175f9c1bSJussi Kivilinna sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); 87175f9c1bSJussi Kivilinna return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 885f86173bSJussi Kivilinna } 895f86173bSJussi Kivilinna 901da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator 911da177e4SLinus Torvalds * Use entropy source for initial seed. 921da177e4SLinus Torvalds */ 931da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho) 941da177e4SLinus Torvalds { 951da177e4SLinus Torvalds state->rho = rho; 961da177e4SLinus Torvalds state->last = net_random(); 971da177e4SLinus Torvalds } 981da177e4SLinus Torvalds 991da177e4SLinus Torvalds /* get_crandom - correlated random number generator 1001da177e4SLinus Torvalds * Next number depends on last value. 1011da177e4SLinus Torvalds * rho is scaled to avoid floating point. 1021da177e4SLinus Torvalds */ 103b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state) 1041da177e4SLinus Torvalds { 1051da177e4SLinus Torvalds u64 value, rho; 1061da177e4SLinus Torvalds unsigned long answer; 1071da177e4SLinus Torvalds 108bb2f8cc0SStephen Hemminger if (state->rho == 0) /* no correlation */ 1091da177e4SLinus Torvalds return net_random(); 1101da177e4SLinus Torvalds 1111da177e4SLinus Torvalds value = net_random(); 1121da177e4SLinus Torvalds rho = (u64)state->rho + 1; 1131da177e4SLinus Torvalds answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 1141da177e4SLinus Torvalds state->last = answer; 1151da177e4SLinus Torvalds return answer; 1161da177e4SLinus Torvalds } 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and 1191da177e4SLinus Torvalds * std deviation sigma. Uses table lookup to approximate the desired 1201da177e4SLinus Torvalds * distribution, and a uniformly-distributed pseudo-random source. 1211da177e4SLinus Torvalds */ 122b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 123b407621cSStephen Hemminger struct crndstate *state, 124b407621cSStephen Hemminger const struct disttable *dist) 1251da177e4SLinus Torvalds { 126b407621cSStephen Hemminger psched_tdiff_t x; 127b407621cSStephen Hemminger long t; 128b407621cSStephen Hemminger u32 rnd; 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds if (sigma == 0) 1311da177e4SLinus Torvalds return mu; 1321da177e4SLinus Torvalds 1331da177e4SLinus Torvalds rnd = get_crandom(state); 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds /* default uniform distribution */ 1361da177e4SLinus Torvalds if (dist == NULL) 1371da177e4SLinus Torvalds return (rnd % (2*sigma)) - sigma + mu; 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds t = dist->table[rnd % dist->size]; 1401da177e4SLinus Torvalds x = (sigma % NETEM_DIST_SCALE) * t; 1411da177e4SLinus Torvalds if (x >= 0) 1421da177e4SLinus Torvalds x += NETEM_DIST_SCALE/2; 1431da177e4SLinus Torvalds else 1441da177e4SLinus Torvalds x -= NETEM_DIST_SCALE/2; 1451da177e4SLinus Torvalds 1461da177e4SLinus Torvalds return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 1471da177e4SLinus Torvalds } 1481da177e4SLinus Torvalds 1490afb51e7SStephen Hemminger /* 1500afb51e7SStephen Hemminger * Insert one skb into qdisc. 1510afb51e7SStephen Hemminger * Note: parent depends on return value to account for queue length. 1520afb51e7SStephen Hemminger * NET_XMIT_DROP: queue length didn't change. 1530afb51e7SStephen Hemminger * NET_XMIT_SUCCESS: one skb was queued. 1540afb51e7SStephen Hemminger */ 1551da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1561da177e4SLinus Torvalds { 1571da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 15889e1df74SGuillaume Chazarain /* We don't fill cb now as skb_unshare() may invalidate it */ 15989e1df74SGuillaume Chazarain struct netem_skb_cb *cb; 1600afb51e7SStephen Hemminger struct sk_buff *skb2; 1611da177e4SLinus Torvalds int ret; 1620afb51e7SStephen Hemminger int count = 1; 1631da177e4SLinus Torvalds 164771018e7SStephen Hemminger pr_debug("netem_enqueue skb=%p\n", skb); 1651da177e4SLinus Torvalds 1660afb51e7SStephen Hemminger /* Random duplication */ 1670afb51e7SStephen Hemminger if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 1680afb51e7SStephen Hemminger ++count; 1690afb51e7SStephen Hemminger 1701da177e4SLinus Torvalds /* Random packet drop 0 => none, ~0 => all */ 1710afb51e7SStephen Hemminger if (q->loss && q->loss >= get_crandom(&q->loss_cor)) 1720afb51e7SStephen Hemminger --count; 1730afb51e7SStephen Hemminger 1740afb51e7SStephen Hemminger if (count == 0) { 1751da177e4SLinus Torvalds sch->qstats.drops++; 1761da177e4SLinus Torvalds kfree_skb(skb); 177c27f339aSJarek Poplawski return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 1781da177e4SLinus Torvalds } 1791da177e4SLinus Torvalds 1804e8a5201SDavid S. Miller skb_orphan(skb); 1814e8a5201SDavid S. Miller 1820afb51e7SStephen Hemminger /* 1830afb51e7SStephen Hemminger * If we need to duplicate packet, then re-insert at top of the 1840afb51e7SStephen Hemminger * qdisc tree, since parent queuer expects that only one 1850afb51e7SStephen Hemminger * skb will be queued. 186d5d75cd6SStephen Hemminger */ 1870afb51e7SStephen Hemminger if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 1887698b4fcSDavid S. Miller struct Qdisc *rootq = qdisc_root(sch); 1890afb51e7SStephen Hemminger u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 1900afb51e7SStephen Hemminger q->duplicate = 0; 191d5d75cd6SStephen Hemminger 1925f86173bSJussi Kivilinna qdisc_enqueue_root(skb2, rootq); 1930afb51e7SStephen Hemminger q->duplicate = dupsave; 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds 196c865e5d9SStephen Hemminger /* 197c865e5d9SStephen Hemminger * Randomized packet corruption. 198c865e5d9SStephen Hemminger * Make copy if needed since we are modifying 199c865e5d9SStephen Hemminger * If packet is going to be hardware checksummed, then 200c865e5d9SStephen Hemminger * do it now in software before we mangle it. 201c865e5d9SStephen Hemminger */ 202c865e5d9SStephen Hemminger if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 203f64f9e71SJoe Perches if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || 204f64f9e71SJoe Perches (skb->ip_summed == CHECKSUM_PARTIAL && 205f64f9e71SJoe Perches skb_checksum_help(skb))) { 206c865e5d9SStephen Hemminger sch->qstats.drops++; 207c865e5d9SStephen Hemminger return NET_XMIT_DROP; 208c865e5d9SStephen Hemminger } 209c865e5d9SStephen Hemminger 210c865e5d9SStephen Hemminger skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); 211c865e5d9SStephen Hemminger } 212c865e5d9SStephen Hemminger 2135f86173bSJussi Kivilinna cb = netem_skb_cb(skb); 214f64f9e71SJoe Perches if (q->gap == 0 || /* not doing reordering */ 215f64f9e71SJoe Perches q->counter < q->gap || /* inside last reordering gap */ 216f64f9e71SJoe Perches q->reorder < get_crandom(&q->reorder_cor)) { 2170f9f32acSStephen Hemminger psched_time_t now; 21807aaa115SStephen Hemminger psched_tdiff_t delay; 21907aaa115SStephen Hemminger 22007aaa115SStephen Hemminger delay = tabledist(q->latency, q->jitter, 22107aaa115SStephen Hemminger &q->delay_cor, q->delay_dist); 22207aaa115SStephen Hemminger 2233bebcda2SPatrick McHardy now = psched_get_time(); 2247c59e25fSPatrick McHardy cb->time_to_send = now + delay; 2251da177e4SLinus Torvalds ++q->counter; 2265f86173bSJussi Kivilinna ret = qdisc_enqueue(skb, q->qdisc); 2271da177e4SLinus Torvalds } else { 2280dca51d3SStephen Hemminger /* 2290dca51d3SStephen Hemminger * Do re-ordering by putting one out of N packets at the front 2300dca51d3SStephen Hemminger * of the queue. 2310dca51d3SStephen Hemminger */ 2323bebcda2SPatrick McHardy cb->time_to_send = psched_get_time(); 2330dca51d3SStephen Hemminger q->counter = 0; 2348ba25dadSJarek Poplawski 2358ba25dadSJarek Poplawski __skb_queue_head(&q->qdisc->q, skb); 2368ba25dadSJarek Poplawski q->qdisc->qstats.backlog += qdisc_pkt_len(skb); 2378ba25dadSJarek Poplawski q->qdisc->qstats.requeues++; 2388ba25dadSJarek Poplawski ret = NET_XMIT_SUCCESS; 2391da177e4SLinus Torvalds } 2401da177e4SLinus Torvalds 2411da177e4SLinus Torvalds if (likely(ret == NET_XMIT_SUCCESS)) { 2421da177e4SLinus Torvalds sch->q.qlen++; 2430abf77e5SJussi Kivilinna sch->bstats.bytes += qdisc_pkt_len(skb); 2441da177e4SLinus Torvalds sch->bstats.packets++; 245378a2f09SJarek Poplawski } else if (net_xmit_drop_count(ret)) { 2461da177e4SLinus Torvalds sch->qstats.drops++; 247378a2f09SJarek Poplawski } 2481da177e4SLinus Torvalds 249d5d75cd6SStephen Hemminger pr_debug("netem: enqueue ret %d\n", ret); 2501da177e4SLinus Torvalds return ret; 2511da177e4SLinus Torvalds } 2521da177e4SLinus Torvalds 2531da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc* sch) 2541da177e4SLinus Torvalds { 2551da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2566d037a26SPatrick McHardy unsigned int len = 0; 2571da177e4SLinus Torvalds 2586d037a26SPatrick McHardy if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { 2591da177e4SLinus Torvalds sch->q.qlen--; 2601da177e4SLinus Torvalds sch->qstats.drops++; 2611da177e4SLinus Torvalds } 2621da177e4SLinus Torvalds return len; 2631da177e4SLinus Torvalds } 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch) 2661da177e4SLinus Torvalds { 2671da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 2681da177e4SLinus Torvalds struct sk_buff *skb; 2691da177e4SLinus Torvalds 27011274e5aSStephen Hemminger if (sch->flags & TCQ_F_THROTTLED) 27111274e5aSStephen Hemminger return NULL; 27211274e5aSStephen Hemminger 27303c05f0dSJarek Poplawski skb = q->qdisc->ops->peek(q->qdisc); 274771018e7SStephen Hemminger if (skb) { 2755f86173bSJussi Kivilinna const struct netem_skb_cb *cb = netem_skb_cb(skb); 2763bebcda2SPatrick McHardy psched_time_t now = psched_get_time(); 2770f9f32acSStephen Hemminger 2780f9f32acSStephen Hemminger /* if more time remaining? */ 279104e0878SPatrick McHardy if (cb->time_to_send <= now) { 28077be155cSJarek Poplawski skb = qdisc_dequeue_peeked(q->qdisc); 28177be155cSJarek Poplawski if (unlikely(!skb)) 28203c05f0dSJarek Poplawski return NULL; 28303c05f0dSJarek Poplawski 2848caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT 2858caf1539SJarek Poplawski /* 2868caf1539SJarek Poplawski * If it's at ingress let's pretend the delay is 2878caf1539SJarek Poplawski * from the network (tstamp will be updated). 2888caf1539SJarek Poplawski */ 2898caf1539SJarek Poplawski if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 2908caf1539SJarek Poplawski skb->tstamp.tv64 = 0; 2918caf1539SJarek Poplawski #endif 292771018e7SStephen Hemminger pr_debug("netem_dequeue: return skb=%p\n", skb); 2931da177e4SLinus Torvalds sch->q.qlen--; 2940f9f32acSStephen Hemminger return skb; 29511274e5aSStephen Hemminger } 29607aaa115SStephen Hemminger 29711274e5aSStephen Hemminger qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 2980f9f32acSStephen Hemminger } 2990f9f32acSStephen Hemminger 3000f9f32acSStephen Hemminger return NULL; 3011da177e4SLinus Torvalds } 3021da177e4SLinus Torvalds 3031da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch) 3041da177e4SLinus Torvalds { 3051da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3061da177e4SLinus Torvalds 3071da177e4SLinus Torvalds qdisc_reset(q->qdisc); 3081da177e4SLinus Torvalds sch->q.qlen = 0; 30959cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 3101da177e4SLinus Torvalds } 3111da177e4SLinus Torvalds 3121da177e4SLinus Torvalds /* 3131da177e4SLinus Torvalds * Distribution data is a variable size payload containing 3141da177e4SLinus Torvalds * signed 16 bit values. 3151da177e4SLinus Torvalds */ 3161e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 3171da177e4SLinus Torvalds { 3181da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3191e90474cSPatrick McHardy unsigned long n = nla_len(attr)/sizeof(__s16); 3201e90474cSPatrick McHardy const __s16 *data = nla_data(attr); 3217698b4fcSDavid S. Miller spinlock_t *root_lock; 3221da177e4SLinus Torvalds struct disttable *d; 3231da177e4SLinus Torvalds int i; 3241da177e4SLinus Torvalds 3251da177e4SLinus Torvalds if (n > 65536) 3261da177e4SLinus Torvalds return -EINVAL; 3271da177e4SLinus Torvalds 3281da177e4SLinus Torvalds d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 3291da177e4SLinus Torvalds if (!d) 3301da177e4SLinus Torvalds return -ENOMEM; 3311da177e4SLinus Torvalds 3321da177e4SLinus Torvalds d->size = n; 3331da177e4SLinus Torvalds for (i = 0; i < n; i++) 3341da177e4SLinus Torvalds d->table[i] = data[i]; 3351da177e4SLinus Torvalds 336102396aeSJarek Poplawski root_lock = qdisc_root_sleeping_lock(sch); 3377698b4fcSDavid S. Miller 3387698b4fcSDavid S. Miller spin_lock_bh(root_lock); 339b94c8afcSPatrick McHardy kfree(q->delay_dist); 340b94c8afcSPatrick McHardy q->delay_dist = d; 3417698b4fcSDavid S. Miller spin_unlock_bh(root_lock); 3421da177e4SLinus Torvalds return 0; 3431da177e4SLinus Torvalds } 3441da177e4SLinus Torvalds 345265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) 3461da177e4SLinus Torvalds { 3471da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 3481e90474cSPatrick McHardy const struct tc_netem_corr *c = nla_data(attr); 3491da177e4SLinus Torvalds 3501da177e4SLinus Torvalds init_crandom(&q->delay_cor, c->delay_corr); 3511da177e4SLinus Torvalds init_crandom(&q->loss_cor, c->loss_corr); 3521da177e4SLinus Torvalds init_crandom(&q->dup_cor, c->dup_corr); 3531da177e4SLinus Torvalds } 3541da177e4SLinus Torvalds 355265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) 3560dca51d3SStephen Hemminger { 3570dca51d3SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 3581e90474cSPatrick McHardy const struct tc_netem_reorder *r = nla_data(attr); 3590dca51d3SStephen Hemminger 3600dca51d3SStephen Hemminger q->reorder = r->probability; 3610dca51d3SStephen Hemminger init_crandom(&q->reorder_cor, r->correlation); 3620dca51d3SStephen Hemminger } 3630dca51d3SStephen Hemminger 364265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) 365c865e5d9SStephen Hemminger { 366c865e5d9SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 3671e90474cSPatrick McHardy const struct tc_netem_corrupt *r = nla_data(attr); 368c865e5d9SStephen Hemminger 369c865e5d9SStephen Hemminger q->corrupt = r->probability; 370c865e5d9SStephen Hemminger init_crandom(&q->corrupt_cor, r->correlation); 371c865e5d9SStephen Hemminger } 372c865e5d9SStephen Hemminger 37327a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 37427a3421eSPatrick McHardy [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 37527a3421eSPatrick McHardy [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 37627a3421eSPatrick McHardy [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 37727a3421eSPatrick McHardy }; 37827a3421eSPatrick McHardy 3792c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 3802c10b32bSThomas Graf const struct nla_policy *policy, int len) 3812c10b32bSThomas Graf { 3822c10b32bSThomas Graf int nested_len = nla_len(nla) - NLA_ALIGN(len); 3832c10b32bSThomas Graf 3842c10b32bSThomas Graf if (nested_len < 0) 3852c10b32bSThomas Graf return -EINVAL; 3862c10b32bSThomas Graf if (nested_len >= nla_attr_size(0)) 3872c10b32bSThomas Graf return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 3882c10b32bSThomas Graf nested_len, policy); 3892c10b32bSThomas Graf memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 3902c10b32bSThomas Graf return 0; 3912c10b32bSThomas Graf } 3922c10b32bSThomas Graf 393c865e5d9SStephen Hemminger /* Parse netlink message to set options */ 3941e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt) 3951da177e4SLinus Torvalds { 3961da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 397b03f4672SPatrick McHardy struct nlattr *tb[TCA_NETEM_MAX + 1]; 3981da177e4SLinus Torvalds struct tc_netem_qopt *qopt; 3991da177e4SLinus Torvalds int ret; 4001da177e4SLinus Torvalds 401b03f4672SPatrick McHardy if (opt == NULL) 4021da177e4SLinus Torvalds return -EINVAL; 4031da177e4SLinus Torvalds 4042c10b32bSThomas Graf qopt = nla_data(opt); 4052c10b32bSThomas Graf ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); 406b03f4672SPatrick McHardy if (ret < 0) 407b03f4672SPatrick McHardy return ret; 408b03f4672SPatrick McHardy 409fb0305ceSPatrick McHardy ret = fifo_set_limit(q->qdisc, qopt->limit); 4101da177e4SLinus Torvalds if (ret) { 4111da177e4SLinus Torvalds pr_debug("netem: can't set fifo limit\n"); 4121da177e4SLinus Torvalds return ret; 4131da177e4SLinus Torvalds } 4141da177e4SLinus Torvalds 4151da177e4SLinus Torvalds q->latency = qopt->latency; 4161da177e4SLinus Torvalds q->jitter = qopt->jitter; 4171da177e4SLinus Torvalds q->limit = qopt->limit; 4181da177e4SLinus Torvalds q->gap = qopt->gap; 4190dca51d3SStephen Hemminger q->counter = 0; 4201da177e4SLinus Torvalds q->loss = qopt->loss; 4211da177e4SLinus Torvalds q->duplicate = qopt->duplicate; 4221da177e4SLinus Torvalds 423bb2f8cc0SStephen Hemminger /* for compatibility with earlier versions. 424bb2f8cc0SStephen Hemminger * if gap is set, need to assume 100% probability 4250dca51d3SStephen Hemminger */ 426a362e0a7SStephen Hemminger if (q->gap) 4270dca51d3SStephen Hemminger q->reorder = ~0; 4280dca51d3SStephen Hemminger 429265eb67fSStephen Hemminger if (tb[TCA_NETEM_CORR]) 430265eb67fSStephen Hemminger get_correlation(sch, tb[TCA_NETEM_CORR]); 4311da177e4SLinus Torvalds 4321e90474cSPatrick McHardy if (tb[TCA_NETEM_DELAY_DIST]) { 4331e90474cSPatrick McHardy ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); 4341da177e4SLinus Torvalds if (ret) 4351da177e4SLinus Torvalds return ret; 4361da177e4SLinus Torvalds } 437c865e5d9SStephen Hemminger 438265eb67fSStephen Hemminger if (tb[TCA_NETEM_REORDER]) 439265eb67fSStephen Hemminger get_reorder(sch, tb[TCA_NETEM_REORDER]); 4401da177e4SLinus Torvalds 441265eb67fSStephen Hemminger if (tb[TCA_NETEM_CORRUPT]) 442265eb67fSStephen Hemminger get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 4431da177e4SLinus Torvalds 4441da177e4SLinus Torvalds return 0; 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 447300ce174SStephen Hemminger /* 448300ce174SStephen Hemminger * Special case version of FIFO queue for use by netem. 449300ce174SStephen Hemminger * It queues in order based on timestamps in skb's 450300ce174SStephen Hemminger */ 451300ce174SStephen Hemminger struct fifo_sched_data { 452300ce174SStephen Hemminger u32 limit; 453075aa573SStephen Hemminger psched_time_t oldest; 454300ce174SStephen Hemminger }; 455300ce174SStephen Hemminger 456300ce174SStephen Hemminger static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 457300ce174SStephen Hemminger { 458300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 459300ce174SStephen Hemminger struct sk_buff_head *list = &sch->q; 4605f86173bSJussi Kivilinna psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 461300ce174SStephen Hemminger struct sk_buff *skb; 462300ce174SStephen Hemminger 463300ce174SStephen Hemminger if (likely(skb_queue_len(list) < q->limit)) { 464075aa573SStephen Hemminger /* Optimize for add at tail */ 465104e0878SPatrick McHardy if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { 466075aa573SStephen Hemminger q->oldest = tnext; 467075aa573SStephen Hemminger return qdisc_enqueue_tail(nskb, sch); 468075aa573SStephen Hemminger } 469075aa573SStephen Hemminger 470300ce174SStephen Hemminger skb_queue_reverse_walk(list, skb) { 4715f86173bSJussi Kivilinna const struct netem_skb_cb *cb = netem_skb_cb(skb); 472300ce174SStephen Hemminger 473104e0878SPatrick McHardy if (tnext >= cb->time_to_send) 474300ce174SStephen Hemminger break; 475300ce174SStephen Hemminger } 476300ce174SStephen Hemminger 477300ce174SStephen Hemminger __skb_queue_after(list, skb, nskb); 478300ce174SStephen Hemminger 4790abf77e5SJussi Kivilinna sch->qstats.backlog += qdisc_pkt_len(nskb); 4800abf77e5SJussi Kivilinna sch->bstats.bytes += qdisc_pkt_len(nskb); 481300ce174SStephen Hemminger sch->bstats.packets++; 482300ce174SStephen Hemminger 483300ce174SStephen Hemminger return NET_XMIT_SUCCESS; 484300ce174SStephen Hemminger } 485300ce174SStephen Hemminger 486075aa573SStephen Hemminger return qdisc_reshape_fail(nskb, sch); 487300ce174SStephen Hemminger } 488300ce174SStephen Hemminger 4891e90474cSPatrick McHardy static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) 490300ce174SStephen Hemminger { 491300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 492300ce174SStephen Hemminger 493300ce174SStephen Hemminger if (opt) { 4941e90474cSPatrick McHardy struct tc_fifo_qopt *ctl = nla_data(opt); 4951e90474cSPatrick McHardy if (nla_len(opt) < sizeof(*ctl)) 496300ce174SStephen Hemminger return -EINVAL; 497300ce174SStephen Hemminger 498300ce174SStephen Hemminger q->limit = ctl->limit; 499300ce174SStephen Hemminger } else 5005ce2d488SDavid S. Miller q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); 501300ce174SStephen Hemminger 502a084980dSPatrick McHardy q->oldest = PSCHED_PASTPERFECT; 503300ce174SStephen Hemminger return 0; 504300ce174SStephen Hemminger } 505300ce174SStephen Hemminger 506300ce174SStephen Hemminger static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) 507300ce174SStephen Hemminger { 508300ce174SStephen Hemminger struct fifo_sched_data *q = qdisc_priv(sch); 509300ce174SStephen Hemminger struct tc_fifo_qopt opt = { .limit = q->limit }; 510300ce174SStephen Hemminger 5111e90474cSPatrick McHardy NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 512300ce174SStephen Hemminger return skb->len; 513300ce174SStephen Hemminger 5141e90474cSPatrick McHardy nla_put_failure: 515300ce174SStephen Hemminger return -1; 516300ce174SStephen Hemminger } 517300ce174SStephen Hemminger 51820fea08bSEric Dumazet static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { 519300ce174SStephen Hemminger .id = "tfifo", 520300ce174SStephen Hemminger .priv_size = sizeof(struct fifo_sched_data), 521300ce174SStephen Hemminger .enqueue = tfifo_enqueue, 522300ce174SStephen Hemminger .dequeue = qdisc_dequeue_head, 5238e3af978SJarek Poplawski .peek = qdisc_peek_head, 524300ce174SStephen Hemminger .drop = qdisc_queue_drop, 525300ce174SStephen Hemminger .init = tfifo_init, 526300ce174SStephen Hemminger .reset = qdisc_reset_queue, 527300ce174SStephen Hemminger .change = tfifo_init, 528300ce174SStephen Hemminger .dump = tfifo_dump, 529300ce174SStephen Hemminger }; 530300ce174SStephen Hemminger 5311e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt) 5321da177e4SLinus Torvalds { 5331da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5341da177e4SLinus Torvalds int ret; 5351da177e4SLinus Torvalds 5361da177e4SLinus Torvalds if (!opt) 5371da177e4SLinus Torvalds return -EINVAL; 5381da177e4SLinus Torvalds 53959cb5c67SPatrick McHardy qdisc_watchdog_init(&q->watchdog, sch); 5401da177e4SLinus Torvalds 5413511c913SChangli Gao q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, 5429f9afec4SPatrick McHardy TC_H_MAKE(sch->handle, 1)); 5431da177e4SLinus Torvalds if (!q->qdisc) { 5441da177e4SLinus Torvalds pr_debug("netem: qdisc create failed\n"); 5451da177e4SLinus Torvalds return -ENOMEM; 5461da177e4SLinus Torvalds } 5471da177e4SLinus Torvalds 5481da177e4SLinus Torvalds ret = netem_change(sch, opt); 5491da177e4SLinus Torvalds if (ret) { 5501da177e4SLinus Torvalds pr_debug("netem: change failed\n"); 5511da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5521da177e4SLinus Torvalds } 5531da177e4SLinus Torvalds return ret; 5541da177e4SLinus Torvalds } 5551da177e4SLinus Torvalds 5561da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch) 5571da177e4SLinus Torvalds { 5581da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5591da177e4SLinus Torvalds 56059cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 5611da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 5621da177e4SLinus Torvalds kfree(q->delay_dist); 5631da177e4SLinus Torvalds } 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 5661da177e4SLinus Torvalds { 5671da177e4SLinus Torvalds const struct netem_sched_data *q = qdisc_priv(sch); 56827a884dcSArnaldo Carvalho de Melo unsigned char *b = skb_tail_pointer(skb); 5691e90474cSPatrick McHardy struct nlattr *nla = (struct nlattr *) b; 5701da177e4SLinus Torvalds struct tc_netem_qopt qopt; 5711da177e4SLinus Torvalds struct tc_netem_corr cor; 5720dca51d3SStephen Hemminger struct tc_netem_reorder reorder; 573c865e5d9SStephen Hemminger struct tc_netem_corrupt corrupt; 5741da177e4SLinus Torvalds 5751da177e4SLinus Torvalds qopt.latency = q->latency; 5761da177e4SLinus Torvalds qopt.jitter = q->jitter; 5771da177e4SLinus Torvalds qopt.limit = q->limit; 5781da177e4SLinus Torvalds qopt.loss = q->loss; 5791da177e4SLinus Torvalds qopt.gap = q->gap; 5801da177e4SLinus Torvalds qopt.duplicate = q->duplicate; 5811e90474cSPatrick McHardy NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 5821da177e4SLinus Torvalds 5831da177e4SLinus Torvalds cor.delay_corr = q->delay_cor.rho; 5841da177e4SLinus Torvalds cor.loss_corr = q->loss_cor.rho; 5851da177e4SLinus Torvalds cor.dup_corr = q->dup_cor.rho; 5861e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 5870dca51d3SStephen Hemminger 5880dca51d3SStephen Hemminger reorder.probability = q->reorder; 5890dca51d3SStephen Hemminger reorder.correlation = q->reorder_cor.rho; 5901e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 5910dca51d3SStephen Hemminger 592c865e5d9SStephen Hemminger corrupt.probability = q->corrupt; 593c865e5d9SStephen Hemminger corrupt.correlation = q->corrupt_cor.rho; 5941e90474cSPatrick McHardy NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 595c865e5d9SStephen Hemminger 5961e90474cSPatrick McHardy nla->nla_len = skb_tail_pointer(skb) - b; 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds return skb->len; 5991da177e4SLinus Torvalds 6001e90474cSPatrick McHardy nla_put_failure: 601dc5fc579SArnaldo Carvalho de Melo nlmsg_trim(skb, b); 6021da177e4SLinus Torvalds return -1; 6031da177e4SLinus Torvalds } 6041da177e4SLinus Torvalds 60520fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 6061da177e4SLinus Torvalds .id = "netem", 6071da177e4SLinus Torvalds .priv_size = sizeof(struct netem_sched_data), 6081da177e4SLinus Torvalds .enqueue = netem_enqueue, 6091da177e4SLinus Torvalds .dequeue = netem_dequeue, 61077be155cSJarek Poplawski .peek = qdisc_peek_dequeued, 6111da177e4SLinus Torvalds .drop = netem_drop, 6121da177e4SLinus Torvalds .init = netem_init, 6131da177e4SLinus Torvalds .reset = netem_reset, 6141da177e4SLinus Torvalds .destroy = netem_destroy, 6151da177e4SLinus Torvalds .change = netem_change, 6161da177e4SLinus Torvalds .dump = netem_dump, 6171da177e4SLinus Torvalds .owner = THIS_MODULE, 6181da177e4SLinus Torvalds }; 6191da177e4SLinus Torvalds 6201da177e4SLinus Torvalds 6211da177e4SLinus Torvalds static int __init netem_module_init(void) 6221da177e4SLinus Torvalds { 623eb229c4cSStephen Hemminger pr_info("netem: version " VERSION "\n"); 6241da177e4SLinus Torvalds return register_qdisc(&netem_qdisc_ops); 6251da177e4SLinus Torvalds } 6261da177e4SLinus Torvalds static void __exit netem_module_exit(void) 6271da177e4SLinus Torvalds { 6281da177e4SLinus Torvalds unregister_qdisc(&netem_qdisc_ops); 6291da177e4SLinus Torvalds } 6301da177e4SLinus Torvalds module_init(netem_module_init) 6311da177e4SLinus Torvalds module_exit(netem_module_exit) 6321da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 633