11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * net/sched/sch_netem.c Network emulator 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 51da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 61da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 7798b6b19SStephen Hemminger * 2 of the License. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Many of the algorithms and ideas for this came from 101da177e4SLinus Torvalds * NIST Net which is not copyrighted. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Authors: Stephen Hemminger <shemminger@osdl.org> 131da177e4SLinus Torvalds * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 16b7f080cfSAlexey Dobriyan #include <linux/mm.h> 171da177e4SLinus Torvalds #include <linux/module.h> 185a0e3ad6STejun Heo #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/types.h> 201da177e4SLinus Torvalds #include <linux/kernel.h> 211da177e4SLinus Torvalds #include <linux/errno.h> 221da177e4SLinus Torvalds #include <linux/skbuff.h> 2378776d3fSDavid S. Miller #include <linux/vmalloc.h> 241da177e4SLinus Torvalds #include <linux/rtnetlink.h> 2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h> 261da177e4SLinus Torvalds 27dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h> 281da177e4SLinus Torvalds #include <net/pkt_sched.h> 291da177e4SLinus Torvalds 30250a65f7Sstephen hemminger #define VERSION "1.3" 31eb229c4cSStephen Hemminger 321da177e4SLinus Torvalds /* Network Emulation Queuing algorithm. 331da177e4SLinus Torvalds ==================================== 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 361da177e4SLinus Torvalds Network Emulation Tool 371da177e4SLinus Torvalds [2] Luigi Rizzo, DummyNet for FreeBSD 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds ---------------------------------------------------------------- 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds This started out as a simple way to delay outgoing packets to 421da177e4SLinus Torvalds test TCP but has grown to include most of the functionality 431da177e4SLinus Torvalds of a full blown network emulator like NISTnet. It can delay 441da177e4SLinus Torvalds packets and add random jitter (and correlation). The random 451da177e4SLinus Torvalds distribution can be loaded from a table as well to provide 461da177e4SLinus Torvalds normal, Pareto, or experimental curves. Packet loss, 471da177e4SLinus Torvalds duplication, and reordering can also be emulated. 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds This qdisc does not do classification that can be handled in 501da177e4SLinus Torvalds layering other disciplines. It does not need to do bandwidth 511da177e4SLinus Torvalds control either since that can be handled by using token 521da177e4SLinus Torvalds bucket or other rate control. 53661b7972Sstephen hemminger 54661b7972Sstephen hemminger Correlated Loss Generator models 55661b7972Sstephen hemminger 56661b7972Sstephen hemminger Added generation of correlated loss according to the 57661b7972Sstephen hemminger "Gilbert-Elliot" model, a 4-state markov model. 58661b7972Sstephen hemminger 59661b7972Sstephen hemminger References: 60661b7972Sstephen hemminger [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG 61661b7972Sstephen hemminger [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general 62661b7972Sstephen hemminger and intuitive loss model for packet networks and its implementation 63661b7972Sstephen hemminger in the Netem module in the Linux kernel", available in [1] 64661b7972Sstephen hemminger 65661b7972Sstephen hemminger Authors: Stefano Salsano <stefano.salsano at uniroma2.it 66661b7972Sstephen hemminger Fabio Ludovici <fabio.ludovici at yahoo.it> 671da177e4SLinus Torvalds */ 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds struct netem_sched_data { 7050612537SEric Dumazet /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ 7150612537SEric Dumazet 7250612537SEric Dumazet /* optional qdisc for classful handling (NULL at netem init) */ 731da177e4SLinus Torvalds struct Qdisc *qdisc; 7450612537SEric Dumazet 7559cb5c67SPatrick McHardy struct qdisc_watchdog watchdog; 761da177e4SLinus Torvalds 77b407621cSStephen Hemminger psched_tdiff_t latency; 78b407621cSStephen Hemminger psched_tdiff_t jitter; 79b407621cSStephen Hemminger 801da177e4SLinus Torvalds u32 loss; 811da177e4SLinus Torvalds u32 limit; 821da177e4SLinus Torvalds u32 counter; 831da177e4SLinus Torvalds u32 gap; 841da177e4SLinus Torvalds u32 duplicate; 850dca51d3SStephen Hemminger u32 reorder; 86c865e5d9SStephen Hemminger u32 corrupt; 877bc0f28cSHagen Paul Pfeifer u32 rate; 8890b41a1cSHagen Paul Pfeifer s32 packet_overhead; 8990b41a1cSHagen Paul Pfeifer u32 cell_size; 9090b41a1cSHagen Paul Pfeifer u32 cell_size_reciprocal; 9190b41a1cSHagen Paul Pfeifer s32 cell_overhead; 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds struct crndstate { 94b407621cSStephen Hemminger u32 last; 95b407621cSStephen Hemminger u32 rho; 96c865e5d9SStephen Hemminger } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds struct disttable { 991da177e4SLinus Torvalds u32 size; 1001da177e4SLinus Torvalds s16 table[0]; 1011da177e4SLinus Torvalds } *delay_dist; 102661b7972Sstephen hemminger 103661b7972Sstephen hemminger enum { 104661b7972Sstephen hemminger CLG_RANDOM, 105661b7972Sstephen hemminger CLG_4_STATES, 106661b7972Sstephen hemminger CLG_GILB_ELL, 107661b7972Sstephen hemminger } loss_model; 108661b7972Sstephen hemminger 109661b7972Sstephen hemminger /* Correlated Loss Generation models */ 110661b7972Sstephen hemminger struct clgstate { 111661b7972Sstephen hemminger /* state of the Markov chain */ 112661b7972Sstephen hemminger u8 state; 113661b7972Sstephen hemminger 114661b7972Sstephen hemminger /* 4-states and Gilbert-Elliot models */ 115661b7972Sstephen hemminger u32 a1; /* p13 for 4-states or p for GE */ 116661b7972Sstephen hemminger u32 a2; /* p31 for 4-states or r for GE */ 117661b7972Sstephen hemminger u32 a3; /* p32 for 4-states or h for GE */ 118661b7972Sstephen hemminger u32 a4; /* p14 for 4-states or 1-k for GE */ 119661b7972Sstephen hemminger u32 a5; /* p23 used only in 4-states */ 120661b7972Sstephen hemminger } clg; 121661b7972Sstephen hemminger 1221da177e4SLinus Torvalds }; 1231da177e4SLinus Torvalds 12450612537SEric Dumazet /* Time stamp put into socket buffer control block 12550612537SEric Dumazet * Only valid when skbs are in our internal t(ime)fifo queue. 12650612537SEric Dumazet */ 1271da177e4SLinus Torvalds struct netem_skb_cb { 1281da177e4SLinus Torvalds psched_time_t time_to_send; 1291da177e4SLinus Torvalds }; 1301da177e4SLinus Torvalds 1315f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 1325f86173bSJussi Kivilinna { 13316bda13dSDavid S. Miller qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); 134175f9c1bSJussi Kivilinna return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 1355f86173bSJussi Kivilinna } 1365f86173bSJussi Kivilinna 1371da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator 1381da177e4SLinus Torvalds * Use entropy source for initial seed. 1391da177e4SLinus Torvalds */ 1401da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho) 1411da177e4SLinus Torvalds { 1421da177e4SLinus Torvalds state->rho = rho; 1431da177e4SLinus Torvalds state->last = net_random(); 1441da177e4SLinus Torvalds } 1451da177e4SLinus Torvalds 1461da177e4SLinus Torvalds /* get_crandom - correlated random number generator 1471da177e4SLinus Torvalds * Next number depends on last value. 1481da177e4SLinus Torvalds * rho is scaled to avoid floating point. 1491da177e4SLinus Torvalds */ 150b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state) 1511da177e4SLinus Torvalds { 1521da177e4SLinus Torvalds u64 value, rho; 1531da177e4SLinus Torvalds unsigned long answer; 1541da177e4SLinus Torvalds 155bb2f8cc0SStephen Hemminger if (state->rho == 0) /* no correlation */ 1561da177e4SLinus Torvalds return net_random(); 1571da177e4SLinus Torvalds 1581da177e4SLinus Torvalds value = net_random(); 1591da177e4SLinus Torvalds rho = (u64)state->rho + 1; 1601da177e4SLinus Torvalds answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 1611da177e4SLinus Torvalds state->last = answer; 1621da177e4SLinus Torvalds return answer; 1631da177e4SLinus Torvalds } 1641da177e4SLinus Torvalds 165661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator 166661b7972Sstephen hemminger * Generates losses according to the 4-state Markov chain adopted in 167661b7972Sstephen hemminger * the GI (General and Intuitive) loss model. 168661b7972Sstephen hemminger */ 169661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q) 170661b7972Sstephen hemminger { 171661b7972Sstephen hemminger struct clgstate *clg = &q->clg; 172661b7972Sstephen hemminger u32 rnd = net_random(); 173661b7972Sstephen hemminger 174661b7972Sstephen hemminger /* 17525985edcSLucas De Marchi * Makes a comparison between rnd and the transition 176661b7972Sstephen hemminger * probabilities outgoing from the current state, then decides the 177661b7972Sstephen hemminger * next state and if the next packet has to be transmitted or lost. 178661b7972Sstephen hemminger * The four states correspond to: 179661b7972Sstephen hemminger * 1 => successfully transmitted packets within a gap period 180661b7972Sstephen hemminger * 4 => isolated losses within a gap period 181661b7972Sstephen hemminger * 3 => lost packets within a burst period 182661b7972Sstephen hemminger * 2 => successfully transmitted packets within a burst period 183661b7972Sstephen hemminger */ 184661b7972Sstephen hemminger switch (clg->state) { 185661b7972Sstephen hemminger case 1: 186661b7972Sstephen hemminger if (rnd < clg->a4) { 187661b7972Sstephen hemminger clg->state = 4; 188661b7972Sstephen hemminger return true; 189661b7972Sstephen hemminger } else if (clg->a4 < rnd && rnd < clg->a1) { 190661b7972Sstephen hemminger clg->state = 3; 191661b7972Sstephen hemminger return true; 192661b7972Sstephen hemminger } else if (clg->a1 < rnd) 193661b7972Sstephen hemminger clg->state = 1; 194661b7972Sstephen hemminger 195661b7972Sstephen hemminger break; 196661b7972Sstephen hemminger case 2: 197661b7972Sstephen hemminger if (rnd < clg->a5) { 198661b7972Sstephen hemminger clg->state = 3; 199661b7972Sstephen hemminger return true; 200661b7972Sstephen hemminger } else 201661b7972Sstephen hemminger clg->state = 2; 202661b7972Sstephen hemminger 203661b7972Sstephen hemminger break; 204661b7972Sstephen hemminger case 3: 205661b7972Sstephen hemminger if (rnd < clg->a3) 206661b7972Sstephen hemminger clg->state = 2; 207661b7972Sstephen hemminger else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { 208661b7972Sstephen hemminger clg->state = 1; 209661b7972Sstephen hemminger return true; 210661b7972Sstephen hemminger } else if (clg->a2 + clg->a3 < rnd) { 211661b7972Sstephen hemminger clg->state = 3; 212661b7972Sstephen hemminger return true; 213661b7972Sstephen hemminger } 214661b7972Sstephen hemminger break; 215661b7972Sstephen hemminger case 4: 216661b7972Sstephen hemminger clg->state = 1; 217661b7972Sstephen hemminger break; 218661b7972Sstephen hemminger } 219661b7972Sstephen hemminger 220661b7972Sstephen hemminger return false; 221661b7972Sstephen hemminger } 222661b7972Sstephen hemminger 223661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator 224661b7972Sstephen hemminger * Generates losses according to the Gilbert-Elliot loss model or 225661b7972Sstephen hemminger * its special cases (Gilbert or Simple Gilbert) 226661b7972Sstephen hemminger * 22725985edcSLucas De Marchi * Makes a comparison between random number and the transition 228661b7972Sstephen hemminger * probabilities outgoing from the current state, then decides the 22925985edcSLucas De Marchi * next state. A second random number is extracted and the comparison 230661b7972Sstephen hemminger * with the loss probability of the current state decides if the next 231661b7972Sstephen hemminger * packet will be transmitted or lost. 232661b7972Sstephen hemminger */ 233661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q) 234661b7972Sstephen hemminger { 235661b7972Sstephen hemminger struct clgstate *clg = &q->clg; 236661b7972Sstephen hemminger 237661b7972Sstephen hemminger switch (clg->state) { 238661b7972Sstephen hemminger case 1: 239661b7972Sstephen hemminger if (net_random() < clg->a1) 240661b7972Sstephen hemminger clg->state = 2; 241661b7972Sstephen hemminger if (net_random() < clg->a4) 242661b7972Sstephen hemminger return true; 243661b7972Sstephen hemminger case 2: 244661b7972Sstephen hemminger if (net_random() < clg->a2) 245661b7972Sstephen hemminger clg->state = 1; 246661b7972Sstephen hemminger if (clg->a3 > net_random()) 247661b7972Sstephen hemminger return true; 248661b7972Sstephen hemminger } 249661b7972Sstephen hemminger 250661b7972Sstephen hemminger return false; 251661b7972Sstephen hemminger } 252661b7972Sstephen hemminger 253661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q) 254661b7972Sstephen hemminger { 255661b7972Sstephen hemminger switch (q->loss_model) { 256661b7972Sstephen hemminger case CLG_RANDOM: 257661b7972Sstephen hemminger /* Random packet drop 0 => none, ~0 => all */ 258661b7972Sstephen hemminger return q->loss && q->loss >= get_crandom(&q->loss_cor); 259661b7972Sstephen hemminger 260661b7972Sstephen hemminger case CLG_4_STATES: 261661b7972Sstephen hemminger /* 4state loss model algorithm (used also for GI model) 262661b7972Sstephen hemminger * Extracts a value from the markov 4 state loss generator, 263661b7972Sstephen hemminger * if it is 1 drops a packet and if needed writes the event in 264661b7972Sstephen hemminger * the kernel logs 265661b7972Sstephen hemminger */ 266661b7972Sstephen hemminger return loss_4state(q); 267661b7972Sstephen hemminger 268661b7972Sstephen hemminger case CLG_GILB_ELL: 269661b7972Sstephen hemminger /* Gilbert-Elliot loss model algorithm 270661b7972Sstephen hemminger * Extracts a value from the Gilbert-Elliot loss generator, 271661b7972Sstephen hemminger * if it is 1 drops a packet and if needed writes the event in 272661b7972Sstephen hemminger * the kernel logs 273661b7972Sstephen hemminger */ 274661b7972Sstephen hemminger return loss_gilb_ell(q); 275661b7972Sstephen hemminger } 276661b7972Sstephen hemminger 277661b7972Sstephen hemminger return false; /* not reached */ 278661b7972Sstephen hemminger } 279661b7972Sstephen hemminger 280661b7972Sstephen hemminger 2811da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and 2821da177e4SLinus Torvalds * std deviation sigma. Uses table lookup to approximate the desired 2831da177e4SLinus Torvalds * distribution, and a uniformly-distributed pseudo-random source. 2841da177e4SLinus Torvalds */ 285b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 286b407621cSStephen Hemminger struct crndstate *state, 287b407621cSStephen Hemminger const struct disttable *dist) 2881da177e4SLinus Torvalds { 289b407621cSStephen Hemminger psched_tdiff_t x; 290b407621cSStephen Hemminger long t; 291b407621cSStephen Hemminger u32 rnd; 2921da177e4SLinus Torvalds 2931da177e4SLinus Torvalds if (sigma == 0) 2941da177e4SLinus Torvalds return mu; 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds rnd = get_crandom(state); 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds /* default uniform distribution */ 2991da177e4SLinus Torvalds if (dist == NULL) 3001da177e4SLinus Torvalds return (rnd % (2*sigma)) - sigma + mu; 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds t = dist->table[rnd % dist->size]; 3031da177e4SLinus Torvalds x = (sigma % NETEM_DIST_SCALE) * t; 3041da177e4SLinus Torvalds if (x >= 0) 3051da177e4SLinus Torvalds x += NETEM_DIST_SCALE/2; 3061da177e4SLinus Torvalds else 3071da177e4SLinus Torvalds x -= NETEM_DIST_SCALE/2; 3081da177e4SLinus Torvalds 3091da177e4SLinus Torvalds return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 3101da177e4SLinus Torvalds } 3111da177e4SLinus Torvalds 31290b41a1cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) 3137bc0f28cSHagen Paul Pfeifer { 31490b41a1cSHagen Paul Pfeifer u64 ticks; 315fc33cc72SEric Dumazet 31690b41a1cSHagen Paul Pfeifer len += q->packet_overhead; 31790b41a1cSHagen Paul Pfeifer 31890b41a1cSHagen Paul Pfeifer if (q->cell_size) { 31990b41a1cSHagen Paul Pfeifer u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); 32090b41a1cSHagen Paul Pfeifer 32190b41a1cSHagen Paul Pfeifer if (len > cells * q->cell_size) /* extra cell needed for remainder */ 32290b41a1cSHagen Paul Pfeifer cells++; 32390b41a1cSHagen Paul Pfeifer len = cells * (q->cell_size + q->cell_overhead); 32490b41a1cSHagen Paul Pfeifer } 32590b41a1cSHagen Paul Pfeifer 32690b41a1cSHagen Paul Pfeifer ticks = (u64)len * NSEC_PER_SEC; 32790b41a1cSHagen Paul Pfeifer 32890b41a1cSHagen Paul Pfeifer do_div(ticks, q->rate); 329fc33cc72SEric Dumazet return PSCHED_NS2TICKS(ticks); 3307bc0f28cSHagen Paul Pfeifer } 3317bc0f28cSHagen Paul Pfeifer 33250612537SEric Dumazet static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 33350612537SEric Dumazet { 33450612537SEric Dumazet struct sk_buff_head *list = &sch->q; 33550612537SEric Dumazet psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 33650612537SEric Dumazet struct sk_buff *skb; 33750612537SEric Dumazet 33850612537SEric Dumazet if (likely(skb_queue_len(list) < sch->limit)) { 33950612537SEric Dumazet skb = skb_peek_tail(list); 34050612537SEric Dumazet /* Optimize for add at tail */ 34150612537SEric Dumazet if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) 34250612537SEric Dumazet return qdisc_enqueue_tail(nskb, sch); 34350612537SEric Dumazet 34450612537SEric Dumazet skb_queue_reverse_walk(list, skb) { 34550612537SEric Dumazet if (tnext >= netem_skb_cb(skb)->time_to_send) 34650612537SEric Dumazet break; 34750612537SEric Dumazet } 34850612537SEric Dumazet 34950612537SEric Dumazet __skb_queue_after(list, skb, nskb); 35050612537SEric Dumazet sch->qstats.backlog += qdisc_pkt_len(nskb); 35150612537SEric Dumazet return NET_XMIT_SUCCESS; 35250612537SEric Dumazet } 35350612537SEric Dumazet 35450612537SEric Dumazet return qdisc_reshape_fail(nskb, sch); 35550612537SEric Dumazet } 35650612537SEric Dumazet 3570afb51e7SStephen Hemminger /* 3580afb51e7SStephen Hemminger * Insert one skb into qdisc. 3590afb51e7SStephen Hemminger * Note: parent depends on return value to account for queue length. 3600afb51e7SStephen Hemminger * NET_XMIT_DROP: queue length didn't change. 3610afb51e7SStephen Hemminger * NET_XMIT_SUCCESS: one skb was queued. 3620afb51e7SStephen Hemminger */ 3631da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 3641da177e4SLinus Torvalds { 3651da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 36689e1df74SGuillaume Chazarain /* We don't fill cb now as skb_unshare() may invalidate it */ 36789e1df74SGuillaume Chazarain struct netem_skb_cb *cb; 3680afb51e7SStephen Hemminger struct sk_buff *skb2; 3691da177e4SLinus Torvalds int ret; 3700afb51e7SStephen Hemminger int count = 1; 3711da177e4SLinus Torvalds 3720afb51e7SStephen Hemminger /* Random duplication */ 3730afb51e7SStephen Hemminger if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 3740afb51e7SStephen Hemminger ++count; 3750afb51e7SStephen Hemminger 376661b7972Sstephen hemminger /* Drop packet? */ 377661b7972Sstephen hemminger if (loss_event(q)) 3780afb51e7SStephen Hemminger --count; 3790afb51e7SStephen Hemminger 3800afb51e7SStephen Hemminger if (count == 0) { 3811da177e4SLinus Torvalds sch->qstats.drops++; 3821da177e4SLinus Torvalds kfree_skb(skb); 383c27f339aSJarek Poplawski return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 3841da177e4SLinus Torvalds } 3851da177e4SLinus Torvalds 3864e8a5201SDavid S. Miller skb_orphan(skb); 3874e8a5201SDavid S. Miller 3880afb51e7SStephen Hemminger /* 3890afb51e7SStephen Hemminger * If we need to duplicate packet, then re-insert at top of the 3900afb51e7SStephen Hemminger * qdisc tree, since parent queuer expects that only one 3910afb51e7SStephen Hemminger * skb will be queued. 392d5d75cd6SStephen Hemminger */ 3930afb51e7SStephen Hemminger if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 3947698b4fcSDavid S. Miller struct Qdisc *rootq = qdisc_root(sch); 3950afb51e7SStephen Hemminger u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 3960afb51e7SStephen Hemminger q->duplicate = 0; 397d5d75cd6SStephen Hemminger 3985f86173bSJussi Kivilinna qdisc_enqueue_root(skb2, rootq); 3990afb51e7SStephen Hemminger q->duplicate = dupsave; 4001da177e4SLinus Torvalds } 4011da177e4SLinus Torvalds 402c865e5d9SStephen Hemminger /* 403c865e5d9SStephen Hemminger * Randomized packet corruption. 404c865e5d9SStephen Hemminger * Make copy if needed since we are modifying 405c865e5d9SStephen Hemminger * If packet is going to be hardware checksummed, then 406c865e5d9SStephen Hemminger * do it now in software before we mangle it. 407c865e5d9SStephen Hemminger */ 408c865e5d9SStephen Hemminger if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 409f64f9e71SJoe Perches if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || 410f64f9e71SJoe Perches (skb->ip_summed == CHECKSUM_PARTIAL && 411f64f9e71SJoe Perches skb_checksum_help(skb))) { 412c865e5d9SStephen Hemminger sch->qstats.drops++; 413c865e5d9SStephen Hemminger return NET_XMIT_DROP; 414c865e5d9SStephen Hemminger } 415c865e5d9SStephen Hemminger 416c865e5d9SStephen Hemminger skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); 417c865e5d9SStephen Hemminger } 418c865e5d9SStephen Hemminger 4195f86173bSJussi Kivilinna cb = netem_skb_cb(skb); 420f64f9e71SJoe Perches if (q->gap == 0 || /* not doing reordering */ 421a42b4799SVijay Subramanian q->counter < q->gap - 1 || /* inside last reordering gap */ 422f64f9e71SJoe Perches q->reorder < get_crandom(&q->reorder_cor)) { 4230f9f32acSStephen Hemminger psched_time_t now; 42407aaa115SStephen Hemminger psched_tdiff_t delay; 42507aaa115SStephen Hemminger 42607aaa115SStephen Hemminger delay = tabledist(q->latency, q->jitter, 42707aaa115SStephen Hemminger &q->delay_cor, q->delay_dist); 42807aaa115SStephen Hemminger 4293bebcda2SPatrick McHardy now = psched_get_time(); 4307bc0f28cSHagen Paul Pfeifer 4317bc0f28cSHagen Paul Pfeifer if (q->rate) { 43250612537SEric Dumazet struct sk_buff_head *list = &sch->q; 4337bc0f28cSHagen Paul Pfeifer 43490b41a1cSHagen Paul Pfeifer delay += packet_len_2_sched_time(skb->len, q); 4357bc0f28cSHagen Paul Pfeifer 4367bc0f28cSHagen Paul Pfeifer if (!skb_queue_empty(list)) { 4377bc0f28cSHagen Paul Pfeifer /* 4387bc0f28cSHagen Paul Pfeifer * Last packet in queue is reference point (now). 4397bc0f28cSHagen Paul Pfeifer * First packet in queue is already in flight, 4407bc0f28cSHagen Paul Pfeifer * calculate this time bonus and substract 4417bc0f28cSHagen Paul Pfeifer * from delay. 4427bc0f28cSHagen Paul Pfeifer */ 4437bc0f28cSHagen Paul Pfeifer delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; 4447bc0f28cSHagen Paul Pfeifer now = netem_skb_cb(skb_peek_tail(list))->time_to_send; 4457bc0f28cSHagen Paul Pfeifer } 4467bc0f28cSHagen Paul Pfeifer } 4477bc0f28cSHagen Paul Pfeifer 4487c59e25fSPatrick McHardy cb->time_to_send = now + delay; 4491da177e4SLinus Torvalds ++q->counter; 45050612537SEric Dumazet ret = tfifo_enqueue(skb, sch); 4511da177e4SLinus Torvalds } else { 4520dca51d3SStephen Hemminger /* 4530dca51d3SStephen Hemminger * Do re-ordering by putting one out of N packets at the front 4540dca51d3SStephen Hemminger * of the queue. 4550dca51d3SStephen Hemminger */ 4563bebcda2SPatrick McHardy cb->time_to_send = psched_get_time(); 4570dca51d3SStephen Hemminger q->counter = 0; 4588ba25dadSJarek Poplawski 45950612537SEric Dumazet __skb_queue_head(&sch->q, skb); 460eb101924SHagen Paul Pfeifer sch->qstats.backlog += qdisc_pkt_len(skb); 461eb101924SHagen Paul Pfeifer sch->qstats.requeues++; 4628ba25dadSJarek Poplawski ret = NET_XMIT_SUCCESS; 4631da177e4SLinus Torvalds } 4641da177e4SLinus Torvalds 46510f6dfcfSstephen hemminger if (ret != NET_XMIT_SUCCESS) { 46610f6dfcfSstephen hemminger if (net_xmit_drop_count(ret)) { 4671da177e4SLinus Torvalds sch->qstats.drops++; 46810f6dfcfSstephen hemminger return ret; 46910f6dfcfSstephen hemminger } 470378a2f09SJarek Poplawski } 4711da177e4SLinus Torvalds 47210f6dfcfSstephen hemminger return NET_XMIT_SUCCESS; 4731da177e4SLinus Torvalds } 4741da177e4SLinus Torvalds 4751da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch) 4761da177e4SLinus Torvalds { 4771da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 47850612537SEric Dumazet unsigned int len; 4791da177e4SLinus Torvalds 48050612537SEric Dumazet len = qdisc_queue_drop(sch); 48150612537SEric Dumazet if (!len && q->qdisc && q->qdisc->ops->drop) 48250612537SEric Dumazet len = q->qdisc->ops->drop(q->qdisc); 48350612537SEric Dumazet if (len) 4841da177e4SLinus Torvalds sch->qstats.drops++; 48550612537SEric Dumazet 4861da177e4SLinus Torvalds return len; 4871da177e4SLinus Torvalds } 4881da177e4SLinus Torvalds 4891da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch) 4901da177e4SLinus Torvalds { 4911da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 4921da177e4SLinus Torvalds struct sk_buff *skb; 4931da177e4SLinus Torvalds 494fd245a4aSEric Dumazet if (qdisc_is_throttled(sch)) 49511274e5aSStephen Hemminger return NULL; 49611274e5aSStephen Hemminger 49750612537SEric Dumazet tfifo_dequeue: 49850612537SEric Dumazet skb = qdisc_peek_head(sch); 499771018e7SStephen Hemminger if (skb) { 5005f86173bSJussi Kivilinna const struct netem_skb_cb *cb = netem_skb_cb(skb); 5010f9f32acSStephen Hemminger 5020f9f32acSStephen Hemminger /* if more time remaining? */ 50350612537SEric Dumazet if (cb->time_to_send <= psched_get_time()) { 504cd961c2cSEric Dumazet __skb_unlink(skb, &sch->q); 505cd961c2cSEric Dumazet sch->qstats.backlog -= qdisc_pkt_len(skb); 50603c05f0dSJarek Poplawski 5078caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT 5088caf1539SJarek Poplawski /* 5098caf1539SJarek Poplawski * If it's at ingress let's pretend the delay is 5108caf1539SJarek Poplawski * from the network (tstamp will be updated). 5118caf1539SJarek Poplawski */ 5128caf1539SJarek Poplawski if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 5138caf1539SJarek Poplawski skb->tstamp.tv64 = 0; 5148caf1539SJarek Poplawski #endif 51510f6dfcfSstephen hemminger 51650612537SEric Dumazet if (q->qdisc) { 51750612537SEric Dumazet int err = qdisc_enqueue(skb, q->qdisc); 51850612537SEric Dumazet 51950612537SEric Dumazet if (unlikely(err != NET_XMIT_SUCCESS)) { 52050612537SEric Dumazet if (net_xmit_drop_count(err)) { 52150612537SEric Dumazet sch->qstats.drops++; 52250612537SEric Dumazet qdisc_tree_decrease_qlen(sch, 1); 52350612537SEric Dumazet } 52450612537SEric Dumazet } 52550612537SEric Dumazet goto tfifo_dequeue; 52650612537SEric Dumazet } 52750612537SEric Dumazet deliver: 52810f6dfcfSstephen hemminger qdisc_unthrottled(sch); 52910f6dfcfSstephen hemminger qdisc_bstats_update(sch, skb); 5300f9f32acSStephen Hemminger return skb; 53111274e5aSStephen Hemminger } 53207aaa115SStephen Hemminger 53350612537SEric Dumazet if (q->qdisc) { 53450612537SEric Dumazet skb = q->qdisc->ops->dequeue(q->qdisc); 53550612537SEric Dumazet if (skb) 53650612537SEric Dumazet goto deliver; 53750612537SEric Dumazet } 53811274e5aSStephen Hemminger qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 5390f9f32acSStephen Hemminger } 5400f9f32acSStephen Hemminger 54150612537SEric Dumazet if (q->qdisc) { 54250612537SEric Dumazet skb = q->qdisc->ops->dequeue(q->qdisc); 54350612537SEric Dumazet if (skb) 54450612537SEric Dumazet goto deliver; 54550612537SEric Dumazet } 5460f9f32acSStephen Hemminger return NULL; 5471da177e4SLinus Torvalds } 5481da177e4SLinus Torvalds 5491da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch) 5501da177e4SLinus Torvalds { 5511da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5521da177e4SLinus Torvalds 55350612537SEric Dumazet qdisc_reset_queue(sch); 55450612537SEric Dumazet if (q->qdisc) 5551da177e4SLinus Torvalds qdisc_reset(q->qdisc); 55659cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 5571da177e4SLinus Torvalds } 5581da177e4SLinus Torvalds 5596373a9a2Sstephen hemminger static void dist_free(struct disttable *d) 5606373a9a2Sstephen hemminger { 5616373a9a2Sstephen hemminger if (d) { 5626373a9a2Sstephen hemminger if (is_vmalloc_addr(d)) 5636373a9a2Sstephen hemminger vfree(d); 5646373a9a2Sstephen hemminger else 5656373a9a2Sstephen hemminger kfree(d); 5666373a9a2Sstephen hemminger } 5676373a9a2Sstephen hemminger } 5686373a9a2Sstephen hemminger 5691da177e4SLinus Torvalds /* 5701da177e4SLinus Torvalds * Distribution data is a variable size payload containing 5711da177e4SLinus Torvalds * signed 16 bit values. 5721da177e4SLinus Torvalds */ 5731e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 5741da177e4SLinus Torvalds { 5751da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5766373a9a2Sstephen hemminger size_t n = nla_len(attr)/sizeof(__s16); 5771e90474cSPatrick McHardy const __s16 *data = nla_data(attr); 5787698b4fcSDavid S. Miller spinlock_t *root_lock; 5791da177e4SLinus Torvalds struct disttable *d; 5801da177e4SLinus Torvalds int i; 5816373a9a2Sstephen hemminger size_t s; 5821da177e4SLinus Torvalds 583df173bdaSstephen hemminger if (n > NETEM_DIST_MAX) 5841da177e4SLinus Torvalds return -EINVAL; 5851da177e4SLinus Torvalds 5866373a9a2Sstephen hemminger s = sizeof(struct disttable) + n * sizeof(s16); 587bb52c7acSEric Dumazet d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN); 5886373a9a2Sstephen hemminger if (!d) 5896373a9a2Sstephen hemminger d = vmalloc(s); 5901da177e4SLinus Torvalds if (!d) 5911da177e4SLinus Torvalds return -ENOMEM; 5921da177e4SLinus Torvalds 5931da177e4SLinus Torvalds d->size = n; 5941da177e4SLinus Torvalds for (i = 0; i < n; i++) 5951da177e4SLinus Torvalds d->table[i] = data[i]; 5961da177e4SLinus Torvalds 597102396aeSJarek Poplawski root_lock = qdisc_root_sleeping_lock(sch); 5987698b4fcSDavid S. Miller 5997698b4fcSDavid S. Miller spin_lock_bh(root_lock); 600bb52c7acSEric Dumazet swap(q->delay_dist, d); 6017698b4fcSDavid S. Miller spin_unlock_bh(root_lock); 602bb52c7acSEric Dumazet 603bb52c7acSEric Dumazet dist_free(d); 6041da177e4SLinus Torvalds return 0; 6051da177e4SLinus Torvalds } 6061da177e4SLinus Torvalds 607265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) 6081da177e4SLinus Torvalds { 6091da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6101e90474cSPatrick McHardy const struct tc_netem_corr *c = nla_data(attr); 6111da177e4SLinus Torvalds 6121da177e4SLinus Torvalds init_crandom(&q->delay_cor, c->delay_corr); 6131da177e4SLinus Torvalds init_crandom(&q->loss_cor, c->loss_corr); 6141da177e4SLinus Torvalds init_crandom(&q->dup_cor, c->dup_corr); 6151da177e4SLinus Torvalds } 6161da177e4SLinus Torvalds 617265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) 6180dca51d3SStephen Hemminger { 6190dca51d3SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 6201e90474cSPatrick McHardy const struct tc_netem_reorder *r = nla_data(attr); 6210dca51d3SStephen Hemminger 6220dca51d3SStephen Hemminger q->reorder = r->probability; 6230dca51d3SStephen Hemminger init_crandom(&q->reorder_cor, r->correlation); 6240dca51d3SStephen Hemminger } 6250dca51d3SStephen Hemminger 626265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) 627c865e5d9SStephen Hemminger { 628c865e5d9SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 6291e90474cSPatrick McHardy const struct tc_netem_corrupt *r = nla_data(attr); 630c865e5d9SStephen Hemminger 631c865e5d9SStephen Hemminger q->corrupt = r->probability; 632c865e5d9SStephen Hemminger init_crandom(&q->corrupt_cor, r->correlation); 633c865e5d9SStephen Hemminger } 634c865e5d9SStephen Hemminger 6357bc0f28cSHagen Paul Pfeifer static void get_rate(struct Qdisc *sch, const struct nlattr *attr) 6367bc0f28cSHagen Paul Pfeifer { 6377bc0f28cSHagen Paul Pfeifer struct netem_sched_data *q = qdisc_priv(sch); 6387bc0f28cSHagen Paul Pfeifer const struct tc_netem_rate *r = nla_data(attr); 6397bc0f28cSHagen Paul Pfeifer 6407bc0f28cSHagen Paul Pfeifer q->rate = r->rate; 64190b41a1cSHagen Paul Pfeifer q->packet_overhead = r->packet_overhead; 64290b41a1cSHagen Paul Pfeifer q->cell_size = r->cell_size; 64390b41a1cSHagen Paul Pfeifer if (q->cell_size) 64490b41a1cSHagen Paul Pfeifer q->cell_size_reciprocal = reciprocal_value(q->cell_size); 64590b41a1cSHagen Paul Pfeifer q->cell_overhead = r->cell_overhead; 6467bc0f28cSHagen Paul Pfeifer } 6477bc0f28cSHagen Paul Pfeifer 648661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) 649661b7972Sstephen hemminger { 650661b7972Sstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 651661b7972Sstephen hemminger const struct nlattr *la; 652661b7972Sstephen hemminger int rem; 653661b7972Sstephen hemminger 654661b7972Sstephen hemminger nla_for_each_nested(la, attr, rem) { 655661b7972Sstephen hemminger u16 type = nla_type(la); 656661b7972Sstephen hemminger 657661b7972Sstephen hemminger switch(type) { 658661b7972Sstephen hemminger case NETEM_LOSS_GI: { 659661b7972Sstephen hemminger const struct tc_netem_gimodel *gi = nla_data(la); 660661b7972Sstephen hemminger 6612494654dSstephen hemminger if (nla_len(la) < sizeof(struct tc_netem_gimodel)) { 662661b7972Sstephen hemminger pr_info("netem: incorrect gi model size\n"); 663661b7972Sstephen hemminger return -EINVAL; 664661b7972Sstephen hemminger } 665661b7972Sstephen hemminger 666661b7972Sstephen hemminger q->loss_model = CLG_4_STATES; 667661b7972Sstephen hemminger 668661b7972Sstephen hemminger q->clg.state = 1; 669661b7972Sstephen hemminger q->clg.a1 = gi->p13; 670661b7972Sstephen hemminger q->clg.a2 = gi->p31; 671661b7972Sstephen hemminger q->clg.a3 = gi->p32; 672661b7972Sstephen hemminger q->clg.a4 = gi->p14; 673661b7972Sstephen hemminger q->clg.a5 = gi->p23; 674661b7972Sstephen hemminger break; 675661b7972Sstephen hemminger } 676661b7972Sstephen hemminger 677661b7972Sstephen hemminger case NETEM_LOSS_GE: { 678661b7972Sstephen hemminger const struct tc_netem_gemodel *ge = nla_data(la); 679661b7972Sstephen hemminger 6802494654dSstephen hemminger if (nla_len(la) < sizeof(struct tc_netem_gemodel)) { 6812494654dSstephen hemminger pr_info("netem: incorrect ge model size\n"); 682661b7972Sstephen hemminger return -EINVAL; 683661b7972Sstephen hemminger } 684661b7972Sstephen hemminger 685661b7972Sstephen hemminger q->loss_model = CLG_GILB_ELL; 686661b7972Sstephen hemminger q->clg.state = 1; 687661b7972Sstephen hemminger q->clg.a1 = ge->p; 688661b7972Sstephen hemminger q->clg.a2 = ge->r; 689661b7972Sstephen hemminger q->clg.a3 = ge->h; 690661b7972Sstephen hemminger q->clg.a4 = ge->k1; 691661b7972Sstephen hemminger break; 692661b7972Sstephen hemminger } 693661b7972Sstephen hemminger 694661b7972Sstephen hemminger default: 695661b7972Sstephen hemminger pr_info("netem: unknown loss type %u\n", type); 696661b7972Sstephen hemminger return -EINVAL; 697661b7972Sstephen hemminger } 698661b7972Sstephen hemminger } 699661b7972Sstephen hemminger 700661b7972Sstephen hemminger return 0; 701661b7972Sstephen hemminger } 702661b7972Sstephen hemminger 70327a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 70427a3421eSPatrick McHardy [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 70527a3421eSPatrick McHardy [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 70627a3421eSPatrick McHardy [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 7077bc0f28cSHagen Paul Pfeifer [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, 708661b7972Sstephen hemminger [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, 70927a3421eSPatrick McHardy }; 71027a3421eSPatrick McHardy 7112c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 7122c10b32bSThomas Graf const struct nla_policy *policy, int len) 7132c10b32bSThomas Graf { 7142c10b32bSThomas Graf int nested_len = nla_len(nla) - NLA_ALIGN(len); 7152c10b32bSThomas Graf 716661b7972Sstephen hemminger if (nested_len < 0) { 717661b7972Sstephen hemminger pr_info("netem: invalid attributes len %d\n", nested_len); 7182c10b32bSThomas Graf return -EINVAL; 719661b7972Sstephen hemminger } 720661b7972Sstephen hemminger 7212c10b32bSThomas Graf if (nested_len >= nla_attr_size(0)) 7222c10b32bSThomas Graf return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 7232c10b32bSThomas Graf nested_len, policy); 724661b7972Sstephen hemminger 7252c10b32bSThomas Graf memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 7262c10b32bSThomas Graf return 0; 7272c10b32bSThomas Graf } 7282c10b32bSThomas Graf 729c865e5d9SStephen Hemminger /* Parse netlink message to set options */ 7301e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt) 7311da177e4SLinus Torvalds { 7321da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 733b03f4672SPatrick McHardy struct nlattr *tb[TCA_NETEM_MAX + 1]; 7341da177e4SLinus Torvalds struct tc_netem_qopt *qopt; 7351da177e4SLinus Torvalds int ret; 7361da177e4SLinus Torvalds 737b03f4672SPatrick McHardy if (opt == NULL) 7381da177e4SLinus Torvalds return -EINVAL; 7391da177e4SLinus Torvalds 7402c10b32bSThomas Graf qopt = nla_data(opt); 7412c10b32bSThomas Graf ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); 742b03f4672SPatrick McHardy if (ret < 0) 743b03f4672SPatrick McHardy return ret; 744b03f4672SPatrick McHardy 74550612537SEric Dumazet sch->limit = qopt->limit; 7461da177e4SLinus Torvalds 7471da177e4SLinus Torvalds q->latency = qopt->latency; 7481da177e4SLinus Torvalds q->jitter = qopt->jitter; 7491da177e4SLinus Torvalds q->limit = qopt->limit; 7501da177e4SLinus Torvalds q->gap = qopt->gap; 7510dca51d3SStephen Hemminger q->counter = 0; 7521da177e4SLinus Torvalds q->loss = qopt->loss; 7531da177e4SLinus Torvalds q->duplicate = qopt->duplicate; 7541da177e4SLinus Torvalds 755bb2f8cc0SStephen Hemminger /* for compatibility with earlier versions. 756bb2f8cc0SStephen Hemminger * if gap is set, need to assume 100% probability 7570dca51d3SStephen Hemminger */ 758a362e0a7SStephen Hemminger if (q->gap) 7590dca51d3SStephen Hemminger q->reorder = ~0; 7600dca51d3SStephen Hemminger 761265eb67fSStephen Hemminger if (tb[TCA_NETEM_CORR]) 762265eb67fSStephen Hemminger get_correlation(sch, tb[TCA_NETEM_CORR]); 7631da177e4SLinus Torvalds 7641e90474cSPatrick McHardy if (tb[TCA_NETEM_DELAY_DIST]) { 7651e90474cSPatrick McHardy ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); 7661da177e4SLinus Torvalds if (ret) 7671da177e4SLinus Torvalds return ret; 7681da177e4SLinus Torvalds } 769c865e5d9SStephen Hemminger 770265eb67fSStephen Hemminger if (tb[TCA_NETEM_REORDER]) 771265eb67fSStephen Hemminger get_reorder(sch, tb[TCA_NETEM_REORDER]); 7721da177e4SLinus Torvalds 773265eb67fSStephen Hemminger if (tb[TCA_NETEM_CORRUPT]) 774265eb67fSStephen Hemminger get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 7751da177e4SLinus Torvalds 7767bc0f28cSHagen Paul Pfeifer if (tb[TCA_NETEM_RATE]) 7777bc0f28cSHagen Paul Pfeifer get_rate(sch, tb[TCA_NETEM_RATE]); 7787bc0f28cSHagen Paul Pfeifer 779661b7972Sstephen hemminger q->loss_model = CLG_RANDOM; 780661b7972Sstephen hemminger if (tb[TCA_NETEM_LOSS]) 781661b7972Sstephen hemminger ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); 782661b7972Sstephen hemminger 783661b7972Sstephen hemminger return ret; 7841da177e4SLinus Torvalds } 7851da177e4SLinus Torvalds 7861e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt) 7871da177e4SLinus Torvalds { 7881da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 7891da177e4SLinus Torvalds int ret; 7901da177e4SLinus Torvalds 7911da177e4SLinus Torvalds if (!opt) 7921da177e4SLinus Torvalds return -EINVAL; 7931da177e4SLinus Torvalds 79459cb5c67SPatrick McHardy qdisc_watchdog_init(&q->watchdog, sch); 7951da177e4SLinus Torvalds 796661b7972Sstephen hemminger q->loss_model = CLG_RANDOM; 7971da177e4SLinus Torvalds ret = netem_change(sch, opt); 79850612537SEric Dumazet if (ret) 799250a65f7Sstephen hemminger pr_info("netem: change failed\n"); 8001da177e4SLinus Torvalds return ret; 8011da177e4SLinus Torvalds } 8021da177e4SLinus Torvalds 8031da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch) 8041da177e4SLinus Torvalds { 8051da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 8061da177e4SLinus Torvalds 80759cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 80850612537SEric Dumazet if (q->qdisc) 8091da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 8106373a9a2Sstephen hemminger dist_free(q->delay_dist); 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 813661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q, 814661b7972Sstephen hemminger struct sk_buff *skb) 815661b7972Sstephen hemminger { 816661b7972Sstephen hemminger struct nlattr *nest; 817661b7972Sstephen hemminger 818661b7972Sstephen hemminger nest = nla_nest_start(skb, TCA_NETEM_LOSS); 819661b7972Sstephen hemminger if (nest == NULL) 820661b7972Sstephen hemminger goto nla_put_failure; 821661b7972Sstephen hemminger 822661b7972Sstephen hemminger switch (q->loss_model) { 823661b7972Sstephen hemminger case CLG_RANDOM: 824661b7972Sstephen hemminger /* legacy loss model */ 825661b7972Sstephen hemminger nla_nest_cancel(skb, nest); 826661b7972Sstephen hemminger return 0; /* no data */ 827661b7972Sstephen hemminger 828661b7972Sstephen hemminger case CLG_4_STATES: { 829661b7972Sstephen hemminger struct tc_netem_gimodel gi = { 830661b7972Sstephen hemminger .p13 = q->clg.a1, 831661b7972Sstephen hemminger .p31 = q->clg.a2, 832661b7972Sstephen hemminger .p32 = q->clg.a3, 833661b7972Sstephen hemminger .p14 = q->clg.a4, 834661b7972Sstephen hemminger .p23 = q->clg.a5, 835661b7972Sstephen hemminger }; 836661b7972Sstephen hemminger 837*1b34ec43SDavid S. Miller if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi)) 838*1b34ec43SDavid S. Miller goto nla_put_failure; 839661b7972Sstephen hemminger break; 840661b7972Sstephen hemminger } 841661b7972Sstephen hemminger case CLG_GILB_ELL: { 842661b7972Sstephen hemminger struct tc_netem_gemodel ge = { 843661b7972Sstephen hemminger .p = q->clg.a1, 844661b7972Sstephen hemminger .r = q->clg.a2, 845661b7972Sstephen hemminger .h = q->clg.a3, 846661b7972Sstephen hemminger .k1 = q->clg.a4, 847661b7972Sstephen hemminger }; 848661b7972Sstephen hemminger 849*1b34ec43SDavid S. Miller if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge)) 850*1b34ec43SDavid S. Miller goto nla_put_failure; 851661b7972Sstephen hemminger break; 852661b7972Sstephen hemminger } 853661b7972Sstephen hemminger } 854661b7972Sstephen hemminger 855661b7972Sstephen hemminger nla_nest_end(skb, nest); 856661b7972Sstephen hemminger return 0; 857661b7972Sstephen hemminger 858661b7972Sstephen hemminger nla_put_failure: 859661b7972Sstephen hemminger nla_nest_cancel(skb, nest); 860661b7972Sstephen hemminger return -1; 861661b7972Sstephen hemminger } 862661b7972Sstephen hemminger 8631da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 8641da177e4SLinus Torvalds { 8651da177e4SLinus Torvalds const struct netem_sched_data *q = qdisc_priv(sch); 866861d7f74Sstephen hemminger struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); 8671da177e4SLinus Torvalds struct tc_netem_qopt qopt; 8681da177e4SLinus Torvalds struct tc_netem_corr cor; 8690dca51d3SStephen Hemminger struct tc_netem_reorder reorder; 870c865e5d9SStephen Hemminger struct tc_netem_corrupt corrupt; 8717bc0f28cSHagen Paul Pfeifer struct tc_netem_rate rate; 8721da177e4SLinus Torvalds 8731da177e4SLinus Torvalds qopt.latency = q->latency; 8741da177e4SLinus Torvalds qopt.jitter = q->jitter; 8751da177e4SLinus Torvalds qopt.limit = q->limit; 8761da177e4SLinus Torvalds qopt.loss = q->loss; 8771da177e4SLinus Torvalds qopt.gap = q->gap; 8781da177e4SLinus Torvalds qopt.duplicate = q->duplicate; 879*1b34ec43SDavid S. Miller if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) 880*1b34ec43SDavid S. Miller goto nla_put_failure; 8811da177e4SLinus Torvalds 8821da177e4SLinus Torvalds cor.delay_corr = q->delay_cor.rho; 8831da177e4SLinus Torvalds cor.loss_corr = q->loss_cor.rho; 8841da177e4SLinus Torvalds cor.dup_corr = q->dup_cor.rho; 885*1b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor)) 886*1b34ec43SDavid S. Miller goto nla_put_failure; 8870dca51d3SStephen Hemminger 8880dca51d3SStephen Hemminger reorder.probability = q->reorder; 8890dca51d3SStephen Hemminger reorder.correlation = q->reorder_cor.rho; 890*1b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder)) 891*1b34ec43SDavid S. Miller goto nla_put_failure; 8920dca51d3SStephen Hemminger 893c865e5d9SStephen Hemminger corrupt.probability = q->corrupt; 894c865e5d9SStephen Hemminger corrupt.correlation = q->corrupt_cor.rho; 895*1b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) 896*1b34ec43SDavid S. Miller goto nla_put_failure; 897c865e5d9SStephen Hemminger 8987bc0f28cSHagen Paul Pfeifer rate.rate = q->rate; 89990b41a1cSHagen Paul Pfeifer rate.packet_overhead = q->packet_overhead; 90090b41a1cSHagen Paul Pfeifer rate.cell_size = q->cell_size; 90190b41a1cSHagen Paul Pfeifer rate.cell_overhead = q->cell_overhead; 902*1b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate)) 903*1b34ec43SDavid S. Miller goto nla_put_failure; 9047bc0f28cSHagen Paul Pfeifer 905661b7972Sstephen hemminger if (dump_loss_model(q, skb) != 0) 906661b7972Sstephen hemminger goto nla_put_failure; 907661b7972Sstephen hemminger 908861d7f74Sstephen hemminger return nla_nest_end(skb, nla); 9091da177e4SLinus Torvalds 9101e90474cSPatrick McHardy nla_put_failure: 911861d7f74Sstephen hemminger nlmsg_trim(skb, nla); 9121da177e4SLinus Torvalds return -1; 9131da177e4SLinus Torvalds } 9141da177e4SLinus Torvalds 91510f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 91610f6dfcfSstephen hemminger struct sk_buff *skb, struct tcmsg *tcm) 91710f6dfcfSstephen hemminger { 91810f6dfcfSstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 91910f6dfcfSstephen hemminger 92050612537SEric Dumazet if (cl != 1 || !q->qdisc) /* only one class */ 92110f6dfcfSstephen hemminger return -ENOENT; 92210f6dfcfSstephen hemminger 92310f6dfcfSstephen hemminger tcm->tcm_handle |= TC_H_MIN(1); 92410f6dfcfSstephen hemminger tcm->tcm_info = q->qdisc->handle; 92510f6dfcfSstephen hemminger 92610f6dfcfSstephen hemminger return 0; 92710f6dfcfSstephen hemminger } 92810f6dfcfSstephen hemminger 92910f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 93010f6dfcfSstephen hemminger struct Qdisc **old) 93110f6dfcfSstephen hemminger { 93210f6dfcfSstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 93310f6dfcfSstephen hemminger 93410f6dfcfSstephen hemminger sch_tree_lock(sch); 93510f6dfcfSstephen hemminger *old = q->qdisc; 93610f6dfcfSstephen hemminger q->qdisc = new; 93750612537SEric Dumazet if (*old) { 93810f6dfcfSstephen hemminger qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 93910f6dfcfSstephen hemminger qdisc_reset(*old); 94050612537SEric Dumazet } 94110f6dfcfSstephen hemminger sch_tree_unlock(sch); 94210f6dfcfSstephen hemminger 94310f6dfcfSstephen hemminger return 0; 94410f6dfcfSstephen hemminger } 94510f6dfcfSstephen hemminger 94610f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 94710f6dfcfSstephen hemminger { 94810f6dfcfSstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 94910f6dfcfSstephen hemminger return q->qdisc; 95010f6dfcfSstephen hemminger } 95110f6dfcfSstephen hemminger 95210f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid) 95310f6dfcfSstephen hemminger { 95410f6dfcfSstephen hemminger return 1; 95510f6dfcfSstephen hemminger } 95610f6dfcfSstephen hemminger 95710f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg) 95810f6dfcfSstephen hemminger { 95910f6dfcfSstephen hemminger } 96010f6dfcfSstephen hemminger 96110f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 96210f6dfcfSstephen hemminger { 96310f6dfcfSstephen hemminger if (!walker->stop) { 96410f6dfcfSstephen hemminger if (walker->count >= walker->skip) 96510f6dfcfSstephen hemminger if (walker->fn(sch, 1, walker) < 0) { 96610f6dfcfSstephen hemminger walker->stop = 1; 96710f6dfcfSstephen hemminger return; 96810f6dfcfSstephen hemminger } 96910f6dfcfSstephen hemminger walker->count++; 97010f6dfcfSstephen hemminger } 97110f6dfcfSstephen hemminger } 97210f6dfcfSstephen hemminger 97310f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = { 97410f6dfcfSstephen hemminger .graft = netem_graft, 97510f6dfcfSstephen hemminger .leaf = netem_leaf, 97610f6dfcfSstephen hemminger .get = netem_get, 97710f6dfcfSstephen hemminger .put = netem_put, 97810f6dfcfSstephen hemminger .walk = netem_walk, 97910f6dfcfSstephen hemminger .dump = netem_dump_class, 98010f6dfcfSstephen hemminger }; 98110f6dfcfSstephen hemminger 98220fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 9831da177e4SLinus Torvalds .id = "netem", 98410f6dfcfSstephen hemminger .cl_ops = &netem_class_ops, 9851da177e4SLinus Torvalds .priv_size = sizeof(struct netem_sched_data), 9861da177e4SLinus Torvalds .enqueue = netem_enqueue, 9871da177e4SLinus Torvalds .dequeue = netem_dequeue, 98877be155cSJarek Poplawski .peek = qdisc_peek_dequeued, 9891da177e4SLinus Torvalds .drop = netem_drop, 9901da177e4SLinus Torvalds .init = netem_init, 9911da177e4SLinus Torvalds .reset = netem_reset, 9921da177e4SLinus Torvalds .destroy = netem_destroy, 9931da177e4SLinus Torvalds .change = netem_change, 9941da177e4SLinus Torvalds .dump = netem_dump, 9951da177e4SLinus Torvalds .owner = THIS_MODULE, 9961da177e4SLinus Torvalds }; 9971da177e4SLinus Torvalds 9981da177e4SLinus Torvalds 9991da177e4SLinus Torvalds static int __init netem_module_init(void) 10001da177e4SLinus Torvalds { 1001eb229c4cSStephen Hemminger pr_info("netem: version " VERSION "\n"); 10021da177e4SLinus Torvalds return register_qdisc(&netem_qdisc_ops); 10031da177e4SLinus Torvalds } 10041da177e4SLinus Torvalds static void __exit netem_module_exit(void) 10051da177e4SLinus Torvalds { 10061da177e4SLinus Torvalds unregister_qdisc(&netem_qdisc_ops); 10071da177e4SLinus Torvalds } 10081da177e4SLinus Torvalds module_init(netem_module_init) 10091da177e4SLinus Torvalds module_exit(netem_module_exit) 10101da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1011