11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * net/sched/sch_netem.c Network emulator 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 51da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 61da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 7798b6b19SStephen Hemminger * 2 of the License. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Many of the algorithms and ideas for this came from 101da177e4SLinus Torvalds * NIST Net which is not copyrighted. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Authors: Stephen Hemminger <shemminger@osdl.org> 131da177e4SLinus Torvalds * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 16b7f080cfSAlexey Dobriyan #include <linux/mm.h> 171da177e4SLinus Torvalds #include <linux/module.h> 185a0e3ad6STejun Heo #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/types.h> 201da177e4SLinus Torvalds #include <linux/kernel.h> 211da177e4SLinus Torvalds #include <linux/errno.h> 221da177e4SLinus Torvalds #include <linux/skbuff.h> 2378776d3fSDavid S. Miller #include <linux/vmalloc.h> 241da177e4SLinus Torvalds #include <linux/rtnetlink.h> 2590b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h> 26aec0a40aSEric Dumazet #include <linux/rbtree.h> 271da177e4SLinus Torvalds 28dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h> 291da177e4SLinus Torvalds #include <net/pkt_sched.h> 30e4ae004bSEric Dumazet #include <net/inet_ecn.h> 311da177e4SLinus Torvalds 32250a65f7Sstephen hemminger #define VERSION "1.3" 33eb229c4cSStephen Hemminger 341da177e4SLinus Torvalds /* Network Emulation Queuing algorithm. 351da177e4SLinus Torvalds ==================================== 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 381da177e4SLinus Torvalds Network Emulation Tool 391da177e4SLinus Torvalds [2] Luigi Rizzo, DummyNet for FreeBSD 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds ---------------------------------------------------------------- 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds This started out as a simple way to delay outgoing packets to 441da177e4SLinus Torvalds test TCP but has grown to include most of the functionality 451da177e4SLinus Torvalds of a full blown network emulator like NISTnet. It can delay 461da177e4SLinus Torvalds packets and add random jitter (and correlation). The random 471da177e4SLinus Torvalds distribution can be loaded from a table as well to provide 481da177e4SLinus Torvalds normal, Pareto, or experimental curves. Packet loss, 491da177e4SLinus Torvalds duplication, and reordering can also be emulated. 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds This qdisc does not do classification that can be handled in 521da177e4SLinus Torvalds layering other disciplines. It does not need to do bandwidth 531da177e4SLinus Torvalds control either since that can be handled by using token 541da177e4SLinus Torvalds bucket or other rate control. 55661b7972Sstephen hemminger 56661b7972Sstephen hemminger Correlated Loss Generator models 57661b7972Sstephen hemminger 58661b7972Sstephen hemminger Added generation of correlated loss according to the 59661b7972Sstephen hemminger "Gilbert-Elliot" model, a 4-state markov model. 60661b7972Sstephen hemminger 61661b7972Sstephen hemminger References: 62661b7972Sstephen hemminger [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG 63661b7972Sstephen hemminger [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general 64661b7972Sstephen hemminger and intuitive loss model for packet networks and its implementation 65661b7972Sstephen hemminger in the Netem module in the Linux kernel", available in [1] 66661b7972Sstephen hemminger 67661b7972Sstephen hemminger Authors: Stefano Salsano <stefano.salsano at uniroma2.it 68661b7972Sstephen hemminger Fabio Ludovici <fabio.ludovici at yahoo.it> 691da177e4SLinus Torvalds */ 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds struct netem_sched_data { 72aec0a40aSEric Dumazet /* internal t(ime)fifo qdisc uses t_root and sch->limit */ 73aec0a40aSEric Dumazet struct rb_root t_root; 7450612537SEric Dumazet 7550612537SEric Dumazet /* optional qdisc for classful handling (NULL at netem init) */ 761da177e4SLinus Torvalds struct Qdisc *qdisc; 7750612537SEric Dumazet 7859cb5c67SPatrick McHardy struct qdisc_watchdog watchdog; 791da177e4SLinus Torvalds 80b407621cSStephen Hemminger psched_tdiff_t latency; 81b407621cSStephen Hemminger psched_tdiff_t jitter; 82b407621cSStephen Hemminger 831da177e4SLinus Torvalds u32 loss; 84e4ae004bSEric Dumazet u32 ecn; 851da177e4SLinus Torvalds u32 limit; 861da177e4SLinus Torvalds u32 counter; 871da177e4SLinus Torvalds u32 gap; 881da177e4SLinus Torvalds u32 duplicate; 890dca51d3SStephen Hemminger u32 reorder; 90c865e5d9SStephen Hemminger u32 corrupt; 916a031f67SYang Yingliang u64 rate; 9290b41a1cSHagen Paul Pfeifer s32 packet_overhead; 9390b41a1cSHagen Paul Pfeifer u32 cell_size; 9490b41a1cSHagen Paul Pfeifer u32 cell_size_reciprocal; 9590b41a1cSHagen Paul Pfeifer s32 cell_overhead; 961da177e4SLinus Torvalds 971da177e4SLinus Torvalds struct crndstate { 98b407621cSStephen Hemminger u32 last; 99b407621cSStephen Hemminger u32 rho; 100c865e5d9SStephen Hemminger } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 1011da177e4SLinus Torvalds 1021da177e4SLinus Torvalds struct disttable { 1031da177e4SLinus Torvalds u32 size; 1041da177e4SLinus Torvalds s16 table[0]; 1051da177e4SLinus Torvalds } *delay_dist; 106661b7972Sstephen hemminger 107661b7972Sstephen hemminger enum { 108661b7972Sstephen hemminger CLG_RANDOM, 109661b7972Sstephen hemminger CLG_4_STATES, 110661b7972Sstephen hemminger CLG_GILB_ELL, 111661b7972Sstephen hemminger } loss_model; 112661b7972Sstephen hemminger 113*a6e2fe17SYang Yingliang enum { 114*a6e2fe17SYang Yingliang TX_IN_GAP_PERIOD = 1, 115*a6e2fe17SYang Yingliang TX_IN_BURST_PERIOD, 116*a6e2fe17SYang Yingliang LOST_IN_GAP_PERIOD, 117*a6e2fe17SYang Yingliang LOST_IN_BURST_PERIOD, 118*a6e2fe17SYang Yingliang } _4_state_model; 119*a6e2fe17SYang Yingliang 120661b7972Sstephen hemminger /* Correlated Loss Generation models */ 121661b7972Sstephen hemminger struct clgstate { 122661b7972Sstephen hemminger /* state of the Markov chain */ 123661b7972Sstephen hemminger u8 state; 124661b7972Sstephen hemminger 125661b7972Sstephen hemminger /* 4-states and Gilbert-Elliot models */ 126661b7972Sstephen hemminger u32 a1; /* p13 for 4-states or p for GE */ 127661b7972Sstephen hemminger u32 a2; /* p31 for 4-states or r for GE */ 128661b7972Sstephen hemminger u32 a3; /* p32 for 4-states or h for GE */ 129661b7972Sstephen hemminger u32 a4; /* p14 for 4-states or 1-k for GE */ 130661b7972Sstephen hemminger u32 a5; /* p23 used only in 4-states */ 131661b7972Sstephen hemminger } clg; 132661b7972Sstephen hemminger 1331da177e4SLinus Torvalds }; 1341da177e4SLinus Torvalds 13550612537SEric Dumazet /* Time stamp put into socket buffer control block 13650612537SEric Dumazet * Only valid when skbs are in our internal t(ime)fifo queue. 13750612537SEric Dumazet */ 1381da177e4SLinus Torvalds struct netem_skb_cb { 1391da177e4SLinus Torvalds psched_time_t time_to_send; 140aec0a40aSEric Dumazet ktime_t tstamp_save; 1411da177e4SLinus Torvalds }; 1421da177e4SLinus Torvalds 143aec0a40aSEric Dumazet /* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp 144aec0a40aSEric Dumazet * to hold a rb_node structure. 145aec0a40aSEric Dumazet * 146aec0a40aSEric Dumazet * If struct sk_buff layout is changed, the following checks will complain. 147aec0a40aSEric Dumazet */ 148aec0a40aSEric Dumazet static struct rb_node *netem_rb_node(struct sk_buff *skb) 149aec0a40aSEric Dumazet { 150aec0a40aSEric Dumazet BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0); 151aec0a40aSEric Dumazet BUILD_BUG_ON(offsetof(struct sk_buff, prev) != 152aec0a40aSEric Dumazet offsetof(struct sk_buff, next) + sizeof(skb->next)); 153aec0a40aSEric Dumazet BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) != 154aec0a40aSEric Dumazet offsetof(struct sk_buff, prev) + sizeof(skb->prev)); 155aec0a40aSEric Dumazet BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) + 156aec0a40aSEric Dumazet sizeof(skb->prev) + 157aec0a40aSEric Dumazet sizeof(skb->tstamp)); 158aec0a40aSEric Dumazet return (struct rb_node *)&skb->next; 159aec0a40aSEric Dumazet } 160aec0a40aSEric Dumazet 161aec0a40aSEric Dumazet static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) 162aec0a40aSEric Dumazet { 163aec0a40aSEric Dumazet return (struct sk_buff *)rb; 164aec0a40aSEric Dumazet } 165aec0a40aSEric Dumazet 1665f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 1675f86173bSJussi Kivilinna { 168aec0a40aSEric Dumazet /* we assume we can use skb next/prev/tstamp as storage for rb_node */ 16916bda13dSDavid S. Miller qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); 170175f9c1bSJussi Kivilinna return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 1715f86173bSJussi Kivilinna } 1725f86173bSJussi Kivilinna 1731da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator 1741da177e4SLinus Torvalds * Use entropy source for initial seed. 1751da177e4SLinus Torvalds */ 1761da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho) 1771da177e4SLinus Torvalds { 1781da177e4SLinus Torvalds state->rho = rho; 17963862b5bSAruna-Hewapathirane state->last = prandom_u32(); 1801da177e4SLinus Torvalds } 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds /* get_crandom - correlated random number generator 1831da177e4SLinus Torvalds * Next number depends on last value. 1841da177e4SLinus Torvalds * rho is scaled to avoid floating point. 1851da177e4SLinus Torvalds */ 186b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state) 1871da177e4SLinus Torvalds { 1881da177e4SLinus Torvalds u64 value, rho; 1891da177e4SLinus Torvalds unsigned long answer; 1901da177e4SLinus Torvalds 191bb2f8cc0SStephen Hemminger if (state->rho == 0) /* no correlation */ 19263862b5bSAruna-Hewapathirane return prandom_u32(); 1931da177e4SLinus Torvalds 19463862b5bSAruna-Hewapathirane value = prandom_u32(); 1951da177e4SLinus Torvalds rho = (u64)state->rho + 1; 1961da177e4SLinus Torvalds answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 1971da177e4SLinus Torvalds state->last = answer; 1981da177e4SLinus Torvalds return answer; 1991da177e4SLinus Torvalds } 2001da177e4SLinus Torvalds 201661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator 202661b7972Sstephen hemminger * Generates losses according to the 4-state Markov chain adopted in 203661b7972Sstephen hemminger * the GI (General and Intuitive) loss model. 204661b7972Sstephen hemminger */ 205661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q) 206661b7972Sstephen hemminger { 207661b7972Sstephen hemminger struct clgstate *clg = &q->clg; 20863862b5bSAruna-Hewapathirane u32 rnd = prandom_u32(); 209661b7972Sstephen hemminger 210661b7972Sstephen hemminger /* 21125985edcSLucas De Marchi * Makes a comparison between rnd and the transition 212661b7972Sstephen hemminger * probabilities outgoing from the current state, then decides the 213661b7972Sstephen hemminger * next state and if the next packet has to be transmitted or lost. 214661b7972Sstephen hemminger * The four states correspond to: 215*a6e2fe17SYang Yingliang * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period 216*a6e2fe17SYang Yingliang * LOST_IN_BURST_PERIOD => isolated losses within a gap period 217*a6e2fe17SYang Yingliang * LOST_IN_GAP_PERIOD => lost packets within a burst period 218*a6e2fe17SYang Yingliang * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period 219661b7972Sstephen hemminger */ 220661b7972Sstephen hemminger switch (clg->state) { 221*a6e2fe17SYang Yingliang case TX_IN_GAP_PERIOD: 222661b7972Sstephen hemminger if (rnd < clg->a4) { 223*a6e2fe17SYang Yingliang clg->state = LOST_IN_BURST_PERIOD; 224661b7972Sstephen hemminger return true; 225ab6c27beSstephen hemminger } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { 226*a6e2fe17SYang Yingliang clg->state = LOST_IN_GAP_PERIOD; 227661b7972Sstephen hemminger return true; 228*a6e2fe17SYang Yingliang } else if (clg->a1 + clg->a4 < rnd) { 229*a6e2fe17SYang Yingliang clg->state = TX_IN_GAP_PERIOD; 230*a6e2fe17SYang Yingliang } 231661b7972Sstephen hemminger 232661b7972Sstephen hemminger break; 233*a6e2fe17SYang Yingliang case TX_IN_BURST_PERIOD: 234661b7972Sstephen hemminger if (rnd < clg->a5) { 235*a6e2fe17SYang Yingliang clg->state = LOST_IN_GAP_PERIOD; 236661b7972Sstephen hemminger return true; 237*a6e2fe17SYang Yingliang } else { 238*a6e2fe17SYang Yingliang clg->state = TX_IN_BURST_PERIOD; 239*a6e2fe17SYang Yingliang } 240661b7972Sstephen hemminger 241661b7972Sstephen hemminger break; 242*a6e2fe17SYang Yingliang case LOST_IN_GAP_PERIOD: 243661b7972Sstephen hemminger if (rnd < clg->a3) 244*a6e2fe17SYang Yingliang clg->state = TX_IN_BURST_PERIOD; 245661b7972Sstephen hemminger else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { 246*a6e2fe17SYang Yingliang clg->state = TX_IN_GAP_PERIOD; 247661b7972Sstephen hemminger } else if (clg->a2 + clg->a3 < rnd) { 248*a6e2fe17SYang Yingliang clg->state = LOST_IN_GAP_PERIOD; 249661b7972Sstephen hemminger return true; 250661b7972Sstephen hemminger } 251661b7972Sstephen hemminger break; 252*a6e2fe17SYang Yingliang case LOST_IN_BURST_PERIOD: 253*a6e2fe17SYang Yingliang clg->state = TX_IN_GAP_PERIOD; 254661b7972Sstephen hemminger break; 255661b7972Sstephen hemminger } 256661b7972Sstephen hemminger 257661b7972Sstephen hemminger return false; 258661b7972Sstephen hemminger } 259661b7972Sstephen hemminger 260661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator 261661b7972Sstephen hemminger * Generates losses according to the Gilbert-Elliot loss model or 262661b7972Sstephen hemminger * its special cases (Gilbert or Simple Gilbert) 263661b7972Sstephen hemminger * 26425985edcSLucas De Marchi * Makes a comparison between random number and the transition 265661b7972Sstephen hemminger * probabilities outgoing from the current state, then decides the 26625985edcSLucas De Marchi * next state. A second random number is extracted and the comparison 267661b7972Sstephen hemminger * with the loss probability of the current state decides if the next 268661b7972Sstephen hemminger * packet will be transmitted or lost. 269661b7972Sstephen hemminger */ 270661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q) 271661b7972Sstephen hemminger { 272661b7972Sstephen hemminger struct clgstate *clg = &q->clg; 273661b7972Sstephen hemminger 274661b7972Sstephen hemminger switch (clg->state) { 275661b7972Sstephen hemminger case 1: 27663862b5bSAruna-Hewapathirane if (prandom_u32() < clg->a1) 277661b7972Sstephen hemminger clg->state = 2; 27863862b5bSAruna-Hewapathirane if (prandom_u32() < clg->a4) 279661b7972Sstephen hemminger return true; 2807c2781faSstephen hemminger break; 281661b7972Sstephen hemminger case 2: 28263862b5bSAruna-Hewapathirane if (prandom_u32() < clg->a2) 283661b7972Sstephen hemminger clg->state = 1; 28463862b5bSAruna-Hewapathirane if (prandom_u32() > clg->a3) 285661b7972Sstephen hemminger return true; 286661b7972Sstephen hemminger } 287661b7972Sstephen hemminger 288661b7972Sstephen hemminger return false; 289661b7972Sstephen hemminger } 290661b7972Sstephen hemminger 291661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q) 292661b7972Sstephen hemminger { 293661b7972Sstephen hemminger switch (q->loss_model) { 294661b7972Sstephen hemminger case CLG_RANDOM: 295661b7972Sstephen hemminger /* Random packet drop 0 => none, ~0 => all */ 296661b7972Sstephen hemminger return q->loss && q->loss >= get_crandom(&q->loss_cor); 297661b7972Sstephen hemminger 298661b7972Sstephen hemminger case CLG_4_STATES: 299661b7972Sstephen hemminger /* 4state loss model algorithm (used also for GI model) 300661b7972Sstephen hemminger * Extracts a value from the markov 4 state loss generator, 301661b7972Sstephen hemminger * if it is 1 drops a packet and if needed writes the event in 302661b7972Sstephen hemminger * the kernel logs 303661b7972Sstephen hemminger */ 304661b7972Sstephen hemminger return loss_4state(q); 305661b7972Sstephen hemminger 306661b7972Sstephen hemminger case CLG_GILB_ELL: 307661b7972Sstephen hemminger /* Gilbert-Elliot loss model algorithm 308661b7972Sstephen hemminger * Extracts a value from the Gilbert-Elliot loss generator, 309661b7972Sstephen hemminger * if it is 1 drops a packet and if needed writes the event in 310661b7972Sstephen hemminger * the kernel logs 311661b7972Sstephen hemminger */ 312661b7972Sstephen hemminger return loss_gilb_ell(q); 313661b7972Sstephen hemminger } 314661b7972Sstephen hemminger 315661b7972Sstephen hemminger return false; /* not reached */ 316661b7972Sstephen hemminger } 317661b7972Sstephen hemminger 318661b7972Sstephen hemminger 3191da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and 3201da177e4SLinus Torvalds * std deviation sigma. Uses table lookup to approximate the desired 3211da177e4SLinus Torvalds * distribution, and a uniformly-distributed pseudo-random source. 3221da177e4SLinus Torvalds */ 323b407621cSStephen Hemminger static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 324b407621cSStephen Hemminger struct crndstate *state, 325b407621cSStephen Hemminger const struct disttable *dist) 3261da177e4SLinus Torvalds { 327b407621cSStephen Hemminger psched_tdiff_t x; 328b407621cSStephen Hemminger long t; 329b407621cSStephen Hemminger u32 rnd; 3301da177e4SLinus Torvalds 3311da177e4SLinus Torvalds if (sigma == 0) 3321da177e4SLinus Torvalds return mu; 3331da177e4SLinus Torvalds 3341da177e4SLinus Torvalds rnd = get_crandom(state); 3351da177e4SLinus Torvalds 3361da177e4SLinus Torvalds /* default uniform distribution */ 3371da177e4SLinus Torvalds if (dist == NULL) 3381da177e4SLinus Torvalds return (rnd % (2*sigma)) - sigma + mu; 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds t = dist->table[rnd % dist->size]; 3411da177e4SLinus Torvalds x = (sigma % NETEM_DIST_SCALE) * t; 3421da177e4SLinus Torvalds if (x >= 0) 3431da177e4SLinus Torvalds x += NETEM_DIST_SCALE/2; 3441da177e4SLinus Torvalds else 3451da177e4SLinus Torvalds x -= NETEM_DIST_SCALE/2; 3461da177e4SLinus Torvalds 3471da177e4SLinus Torvalds return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 3481da177e4SLinus Torvalds } 3491da177e4SLinus Torvalds 35090b41a1cSHagen Paul Pfeifer static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) 3517bc0f28cSHagen Paul Pfeifer { 35290b41a1cSHagen Paul Pfeifer u64 ticks; 353fc33cc72SEric Dumazet 35490b41a1cSHagen Paul Pfeifer len += q->packet_overhead; 35590b41a1cSHagen Paul Pfeifer 35690b41a1cSHagen Paul Pfeifer if (q->cell_size) { 35790b41a1cSHagen Paul Pfeifer u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); 35890b41a1cSHagen Paul Pfeifer 35990b41a1cSHagen Paul Pfeifer if (len > cells * q->cell_size) /* extra cell needed for remainder */ 36090b41a1cSHagen Paul Pfeifer cells++; 36190b41a1cSHagen Paul Pfeifer len = cells * (q->cell_size + q->cell_overhead); 36290b41a1cSHagen Paul Pfeifer } 36390b41a1cSHagen Paul Pfeifer 36490b41a1cSHagen Paul Pfeifer ticks = (u64)len * NSEC_PER_SEC; 36590b41a1cSHagen Paul Pfeifer 36690b41a1cSHagen Paul Pfeifer do_div(ticks, q->rate); 367fc33cc72SEric Dumazet return PSCHED_NS2TICKS(ticks); 3687bc0f28cSHagen Paul Pfeifer } 3697bc0f28cSHagen Paul Pfeifer 370ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch) 371ff704050Sstephen hemminger { 372ff704050Sstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 373ff704050Sstephen hemminger struct rb_node *p; 374ff704050Sstephen hemminger 375ff704050Sstephen hemminger while ((p = rb_first(&q->t_root))) { 376ff704050Sstephen hemminger struct sk_buff *skb = netem_rb_to_skb(p); 377ff704050Sstephen hemminger 378ff704050Sstephen hemminger rb_erase(p, &q->t_root); 379ff704050Sstephen hemminger skb->next = NULL; 380ff704050Sstephen hemminger skb->prev = NULL; 381ff704050Sstephen hemminger kfree_skb(skb); 382ff704050Sstephen hemminger } 383ff704050Sstephen hemminger } 384ff704050Sstephen hemminger 385960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 38650612537SEric Dumazet { 387aec0a40aSEric Dumazet struct netem_sched_data *q = qdisc_priv(sch); 38850612537SEric Dumazet psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 389aec0a40aSEric Dumazet struct rb_node **p = &q->t_root.rb_node, *parent = NULL; 39050612537SEric Dumazet 391aec0a40aSEric Dumazet while (*p) { 392aec0a40aSEric Dumazet struct sk_buff *skb; 39350612537SEric Dumazet 394aec0a40aSEric Dumazet parent = *p; 395aec0a40aSEric Dumazet skb = netem_rb_to_skb(parent); 39650612537SEric Dumazet if (tnext >= netem_skb_cb(skb)->time_to_send) 397aec0a40aSEric Dumazet p = &parent->rb_right; 398aec0a40aSEric Dumazet else 399aec0a40aSEric Dumazet p = &parent->rb_left; 40050612537SEric Dumazet } 401aec0a40aSEric Dumazet rb_link_node(netem_rb_node(nskb), parent, p); 402aec0a40aSEric Dumazet rb_insert_color(netem_rb_node(nskb), &q->t_root); 403aec0a40aSEric Dumazet sch->q.qlen++; 40450612537SEric Dumazet } 40550612537SEric Dumazet 4060afb51e7SStephen Hemminger /* 4070afb51e7SStephen Hemminger * Insert one skb into qdisc. 4080afb51e7SStephen Hemminger * Note: parent depends on return value to account for queue length. 4090afb51e7SStephen Hemminger * NET_XMIT_DROP: queue length didn't change. 4100afb51e7SStephen Hemminger * NET_XMIT_SUCCESS: one skb was queued. 4110afb51e7SStephen Hemminger */ 4121da177e4SLinus Torvalds static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 4131da177e4SLinus Torvalds { 4141da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 41589e1df74SGuillaume Chazarain /* We don't fill cb now as skb_unshare() may invalidate it */ 41689e1df74SGuillaume Chazarain struct netem_skb_cb *cb; 4170afb51e7SStephen Hemminger struct sk_buff *skb2; 4180afb51e7SStephen Hemminger int count = 1; 4191da177e4SLinus Torvalds 4200afb51e7SStephen Hemminger /* Random duplication */ 4210afb51e7SStephen Hemminger if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 4220afb51e7SStephen Hemminger ++count; 4230afb51e7SStephen Hemminger 424661b7972Sstephen hemminger /* Drop packet? */ 425e4ae004bSEric Dumazet if (loss_event(q)) { 426e4ae004bSEric Dumazet if (q->ecn && INET_ECN_set_ce(skb)) 427e4ae004bSEric Dumazet sch->qstats.drops++; /* mark packet */ 428e4ae004bSEric Dumazet else 4290afb51e7SStephen Hemminger --count; 430e4ae004bSEric Dumazet } 4310afb51e7SStephen Hemminger if (count == 0) { 4321da177e4SLinus Torvalds sch->qstats.drops++; 4331da177e4SLinus Torvalds kfree_skb(skb); 434c27f339aSJarek Poplawski return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 4351da177e4SLinus Torvalds } 4361da177e4SLinus Torvalds 4375a308f40SEric Dumazet /* If a delay is expected, orphan the skb. (orphaning usually takes 4385a308f40SEric Dumazet * place at TX completion time, so _before_ the link transit delay) 4395a308f40SEric Dumazet */ 4405a308f40SEric Dumazet if (q->latency || q->jitter) 441f2f872f9SEric Dumazet skb_orphan_partial(skb); 4424e8a5201SDavid S. Miller 4430afb51e7SStephen Hemminger /* 4440afb51e7SStephen Hemminger * If we need to duplicate packet, then re-insert at top of the 4450afb51e7SStephen Hemminger * qdisc tree, since parent queuer expects that only one 4460afb51e7SStephen Hemminger * skb will be queued. 447d5d75cd6SStephen Hemminger */ 4480afb51e7SStephen Hemminger if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 4497698b4fcSDavid S. Miller struct Qdisc *rootq = qdisc_root(sch); 4500afb51e7SStephen Hemminger u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 4510afb51e7SStephen Hemminger q->duplicate = 0; 452d5d75cd6SStephen Hemminger 4535f86173bSJussi Kivilinna qdisc_enqueue_root(skb2, rootq); 4540afb51e7SStephen Hemminger q->duplicate = dupsave; 4551da177e4SLinus Torvalds } 4561da177e4SLinus Torvalds 457c865e5d9SStephen Hemminger /* 458c865e5d9SStephen Hemminger * Randomized packet corruption. 459c865e5d9SStephen Hemminger * Make copy if needed since we are modifying 460c865e5d9SStephen Hemminger * If packet is going to be hardware checksummed, then 461c865e5d9SStephen Hemminger * do it now in software before we mangle it. 462c865e5d9SStephen Hemminger */ 463c865e5d9SStephen Hemminger if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 464f64f9e71SJoe Perches if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || 465f64f9e71SJoe Perches (skb->ip_summed == CHECKSUM_PARTIAL && 466116a0fc3SEric Dumazet skb_checksum_help(skb))) 467116a0fc3SEric Dumazet return qdisc_drop(skb, sch); 468c865e5d9SStephen Hemminger 46963862b5bSAruna-Hewapathirane skb->data[prandom_u32() % skb_headlen(skb)] ^= 47063862b5bSAruna-Hewapathirane 1<<(prandom_u32() % 8); 471c865e5d9SStephen Hemminger } 472c865e5d9SStephen Hemminger 473960fb66eSEric Dumazet if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) 474960fb66eSEric Dumazet return qdisc_reshape_fail(skb, sch); 475960fb66eSEric Dumazet 476960fb66eSEric Dumazet sch->qstats.backlog += qdisc_pkt_len(skb); 477960fb66eSEric Dumazet 4785f86173bSJussi Kivilinna cb = netem_skb_cb(skb); 479f64f9e71SJoe Perches if (q->gap == 0 || /* not doing reordering */ 480a42b4799SVijay Subramanian q->counter < q->gap - 1 || /* inside last reordering gap */ 481f64f9e71SJoe Perches q->reorder < get_crandom(&q->reorder_cor)) { 4820f9f32acSStephen Hemminger psched_time_t now; 48307aaa115SStephen Hemminger psched_tdiff_t delay; 48407aaa115SStephen Hemminger 48507aaa115SStephen Hemminger delay = tabledist(q->latency, q->jitter, 48607aaa115SStephen Hemminger &q->delay_cor, q->delay_dist); 48707aaa115SStephen Hemminger 4883bebcda2SPatrick McHardy now = psched_get_time(); 4897bc0f28cSHagen Paul Pfeifer 4907bc0f28cSHagen Paul Pfeifer if (q->rate) { 491aec0a40aSEric Dumazet struct sk_buff *last; 4927bc0f28cSHagen Paul Pfeifer 493aec0a40aSEric Dumazet if (!skb_queue_empty(&sch->q)) 494aec0a40aSEric Dumazet last = skb_peek_tail(&sch->q); 495aec0a40aSEric Dumazet else 496aec0a40aSEric Dumazet last = netem_rb_to_skb(rb_last(&q->t_root)); 497aec0a40aSEric Dumazet if (last) { 4987bc0f28cSHagen Paul Pfeifer /* 499a13d3104SJohannes Naab * Last packet in queue is reference point (now), 500a13d3104SJohannes Naab * calculate this time bonus and subtract 5017bc0f28cSHagen Paul Pfeifer * from delay. 5027bc0f28cSHagen Paul Pfeifer */ 503aec0a40aSEric Dumazet delay -= netem_skb_cb(last)->time_to_send - now; 504a13d3104SJohannes Naab delay = max_t(psched_tdiff_t, 0, delay); 505aec0a40aSEric Dumazet now = netem_skb_cb(last)->time_to_send; 5067bc0f28cSHagen Paul Pfeifer } 507a13d3104SJohannes Naab 5088cfd88d6SYang Yingliang delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); 5097bc0f28cSHagen Paul Pfeifer } 5107bc0f28cSHagen Paul Pfeifer 5117c59e25fSPatrick McHardy cb->time_to_send = now + delay; 512aec0a40aSEric Dumazet cb->tstamp_save = skb->tstamp; 5131da177e4SLinus Torvalds ++q->counter; 514960fb66eSEric Dumazet tfifo_enqueue(skb, sch); 5151da177e4SLinus Torvalds } else { 5160dca51d3SStephen Hemminger /* 5170dca51d3SStephen Hemminger * Do re-ordering by putting one out of N packets at the front 5180dca51d3SStephen Hemminger * of the queue. 5190dca51d3SStephen Hemminger */ 5203bebcda2SPatrick McHardy cb->time_to_send = psched_get_time(); 5210dca51d3SStephen Hemminger q->counter = 0; 5228ba25dadSJarek Poplawski 52350612537SEric Dumazet __skb_queue_head(&sch->q, skb); 524eb101924SHagen Paul Pfeifer sch->qstats.requeues++; 525378a2f09SJarek Poplawski } 5261da177e4SLinus Torvalds 52710f6dfcfSstephen hemminger return NET_XMIT_SUCCESS; 5281da177e4SLinus Torvalds } 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds static unsigned int netem_drop(struct Qdisc *sch) 5311da177e4SLinus Torvalds { 5321da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 53350612537SEric Dumazet unsigned int len; 5341da177e4SLinus Torvalds 53550612537SEric Dumazet len = qdisc_queue_drop(sch); 536aec0a40aSEric Dumazet 537aec0a40aSEric Dumazet if (!len) { 538aec0a40aSEric Dumazet struct rb_node *p = rb_first(&q->t_root); 539aec0a40aSEric Dumazet 540aec0a40aSEric Dumazet if (p) { 541aec0a40aSEric Dumazet struct sk_buff *skb = netem_rb_to_skb(p); 542aec0a40aSEric Dumazet 543aec0a40aSEric Dumazet rb_erase(p, &q->t_root); 544aec0a40aSEric Dumazet sch->q.qlen--; 545aec0a40aSEric Dumazet skb->next = NULL; 546aec0a40aSEric Dumazet skb->prev = NULL; 547aec0a40aSEric Dumazet len = qdisc_pkt_len(skb); 548638a52b8Sstephen hemminger sch->qstats.backlog -= len; 549aec0a40aSEric Dumazet kfree_skb(skb); 550aec0a40aSEric Dumazet } 551aec0a40aSEric Dumazet } 55250612537SEric Dumazet if (!len && q->qdisc && q->qdisc->ops->drop) 55350612537SEric Dumazet len = q->qdisc->ops->drop(q->qdisc); 55450612537SEric Dumazet if (len) 5551da177e4SLinus Torvalds sch->qstats.drops++; 55650612537SEric Dumazet 5571da177e4SLinus Torvalds return len; 5581da177e4SLinus Torvalds } 5591da177e4SLinus Torvalds 5601da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch) 5611da177e4SLinus Torvalds { 5621da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 5631da177e4SLinus Torvalds struct sk_buff *skb; 564aec0a40aSEric Dumazet struct rb_node *p; 5651da177e4SLinus Torvalds 566fd245a4aSEric Dumazet if (qdisc_is_throttled(sch)) 56711274e5aSStephen Hemminger return NULL; 56811274e5aSStephen Hemminger 56950612537SEric Dumazet tfifo_dequeue: 570aec0a40aSEric Dumazet skb = __skb_dequeue(&sch->q); 571771018e7SStephen Hemminger if (skb) { 572aec0a40aSEric Dumazet deliver: 573aec0a40aSEric Dumazet sch->qstats.backlog -= qdisc_pkt_len(skb); 574aec0a40aSEric Dumazet qdisc_unthrottled(sch); 575aec0a40aSEric Dumazet qdisc_bstats_update(sch, skb); 576aec0a40aSEric Dumazet return skb; 577aec0a40aSEric Dumazet } 578aec0a40aSEric Dumazet p = rb_first(&q->t_root); 579aec0a40aSEric Dumazet if (p) { 58036b7bfe0SEric Dumazet psched_time_t time_to_send; 58136b7bfe0SEric Dumazet 582aec0a40aSEric Dumazet skb = netem_rb_to_skb(p); 5830f9f32acSStephen Hemminger 5840f9f32acSStephen Hemminger /* if more time remaining? */ 58536b7bfe0SEric Dumazet time_to_send = netem_skb_cb(skb)->time_to_send; 58636b7bfe0SEric Dumazet if (time_to_send <= psched_get_time()) { 587aec0a40aSEric Dumazet rb_erase(p, &q->t_root); 588aec0a40aSEric Dumazet 589aec0a40aSEric Dumazet sch->q.qlen--; 590aec0a40aSEric Dumazet skb->next = NULL; 591aec0a40aSEric Dumazet skb->prev = NULL; 592aec0a40aSEric Dumazet skb->tstamp = netem_skb_cb(skb)->tstamp_save; 59303c05f0dSJarek Poplawski 5948caf1539SJarek Poplawski #ifdef CONFIG_NET_CLS_ACT 5958caf1539SJarek Poplawski /* 5968caf1539SJarek Poplawski * If it's at ingress let's pretend the delay is 5978caf1539SJarek Poplawski * from the network (tstamp will be updated). 5988caf1539SJarek Poplawski */ 5998caf1539SJarek Poplawski if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 6008caf1539SJarek Poplawski skb->tstamp.tv64 = 0; 6018caf1539SJarek Poplawski #endif 60210f6dfcfSstephen hemminger 60350612537SEric Dumazet if (q->qdisc) { 60450612537SEric Dumazet int err = qdisc_enqueue(skb, q->qdisc); 60550612537SEric Dumazet 60650612537SEric Dumazet if (unlikely(err != NET_XMIT_SUCCESS)) { 60750612537SEric Dumazet if (net_xmit_drop_count(err)) { 60850612537SEric Dumazet sch->qstats.drops++; 60950612537SEric Dumazet qdisc_tree_decrease_qlen(sch, 1); 61050612537SEric Dumazet } 61150612537SEric Dumazet } 61250612537SEric Dumazet goto tfifo_dequeue; 61350612537SEric Dumazet } 614aec0a40aSEric Dumazet goto deliver; 61511274e5aSStephen Hemminger } 61607aaa115SStephen Hemminger 61750612537SEric Dumazet if (q->qdisc) { 61850612537SEric Dumazet skb = q->qdisc->ops->dequeue(q->qdisc); 61950612537SEric Dumazet if (skb) 62050612537SEric Dumazet goto deliver; 62150612537SEric Dumazet } 62236b7bfe0SEric Dumazet qdisc_watchdog_schedule(&q->watchdog, time_to_send); 6230f9f32acSStephen Hemminger } 6240f9f32acSStephen Hemminger 62550612537SEric Dumazet if (q->qdisc) { 62650612537SEric Dumazet skb = q->qdisc->ops->dequeue(q->qdisc); 62750612537SEric Dumazet if (skb) 62850612537SEric Dumazet goto deliver; 62950612537SEric Dumazet } 6300f9f32acSStephen Hemminger return NULL; 6311da177e4SLinus Torvalds } 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch) 6341da177e4SLinus Torvalds { 6351da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6361da177e4SLinus Torvalds 63750612537SEric Dumazet qdisc_reset_queue(sch); 638ff704050Sstephen hemminger tfifo_reset(sch); 63950612537SEric Dumazet if (q->qdisc) 6401da177e4SLinus Torvalds qdisc_reset(q->qdisc); 64159cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 6421da177e4SLinus Torvalds } 6431da177e4SLinus Torvalds 6446373a9a2Sstephen hemminger static void dist_free(struct disttable *d) 6456373a9a2Sstephen hemminger { 6466373a9a2Sstephen hemminger if (d) { 6476373a9a2Sstephen hemminger if (is_vmalloc_addr(d)) 6486373a9a2Sstephen hemminger vfree(d); 6496373a9a2Sstephen hemminger else 6506373a9a2Sstephen hemminger kfree(d); 6516373a9a2Sstephen hemminger } 6526373a9a2Sstephen hemminger } 6536373a9a2Sstephen hemminger 6541da177e4SLinus Torvalds /* 6551da177e4SLinus Torvalds * Distribution data is a variable size payload containing 6561da177e4SLinus Torvalds * signed 16 bit values. 6571da177e4SLinus Torvalds */ 6581e90474cSPatrick McHardy static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 6591da177e4SLinus Torvalds { 6601da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6616373a9a2Sstephen hemminger size_t n = nla_len(attr)/sizeof(__s16); 6621e90474cSPatrick McHardy const __s16 *data = nla_data(attr); 6637698b4fcSDavid S. Miller spinlock_t *root_lock; 6641da177e4SLinus Torvalds struct disttable *d; 6651da177e4SLinus Torvalds int i; 6666373a9a2Sstephen hemminger size_t s; 6671da177e4SLinus Torvalds 668df173bdaSstephen hemminger if (n > NETEM_DIST_MAX) 6691da177e4SLinus Torvalds return -EINVAL; 6701da177e4SLinus Torvalds 6716373a9a2Sstephen hemminger s = sizeof(struct disttable) + n * sizeof(s16); 672bb52c7acSEric Dumazet d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN); 6736373a9a2Sstephen hemminger if (!d) 6746373a9a2Sstephen hemminger d = vmalloc(s); 6751da177e4SLinus Torvalds if (!d) 6761da177e4SLinus Torvalds return -ENOMEM; 6771da177e4SLinus Torvalds 6781da177e4SLinus Torvalds d->size = n; 6791da177e4SLinus Torvalds for (i = 0; i < n; i++) 6801da177e4SLinus Torvalds d->table[i] = data[i]; 6811da177e4SLinus Torvalds 682102396aeSJarek Poplawski root_lock = qdisc_root_sleeping_lock(sch); 6837698b4fcSDavid S. Miller 6847698b4fcSDavid S. Miller spin_lock_bh(root_lock); 685bb52c7acSEric Dumazet swap(q->delay_dist, d); 6867698b4fcSDavid S. Miller spin_unlock_bh(root_lock); 687bb52c7acSEric Dumazet 688bb52c7acSEric Dumazet dist_free(d); 6891da177e4SLinus Torvalds return 0; 6901da177e4SLinus Torvalds } 6911da177e4SLinus Torvalds 692265eb67fSStephen Hemminger static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) 6931da177e4SLinus Torvalds { 6941da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 6951e90474cSPatrick McHardy const struct tc_netem_corr *c = nla_data(attr); 6961da177e4SLinus Torvalds 6971da177e4SLinus Torvalds init_crandom(&q->delay_cor, c->delay_corr); 6981da177e4SLinus Torvalds init_crandom(&q->loss_cor, c->loss_corr); 6991da177e4SLinus Torvalds init_crandom(&q->dup_cor, c->dup_corr); 7001da177e4SLinus Torvalds } 7011da177e4SLinus Torvalds 702265eb67fSStephen Hemminger static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) 7030dca51d3SStephen Hemminger { 7040dca51d3SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 7051e90474cSPatrick McHardy const struct tc_netem_reorder *r = nla_data(attr); 7060dca51d3SStephen Hemminger 7070dca51d3SStephen Hemminger q->reorder = r->probability; 7080dca51d3SStephen Hemminger init_crandom(&q->reorder_cor, r->correlation); 7090dca51d3SStephen Hemminger } 7100dca51d3SStephen Hemminger 711265eb67fSStephen Hemminger static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) 712c865e5d9SStephen Hemminger { 713c865e5d9SStephen Hemminger struct netem_sched_data *q = qdisc_priv(sch); 7141e90474cSPatrick McHardy const struct tc_netem_corrupt *r = nla_data(attr); 715c865e5d9SStephen Hemminger 716c865e5d9SStephen Hemminger q->corrupt = r->probability; 717c865e5d9SStephen Hemminger init_crandom(&q->corrupt_cor, r->correlation); 718c865e5d9SStephen Hemminger } 719c865e5d9SStephen Hemminger 7207bc0f28cSHagen Paul Pfeifer static void get_rate(struct Qdisc *sch, const struct nlattr *attr) 7217bc0f28cSHagen Paul Pfeifer { 7227bc0f28cSHagen Paul Pfeifer struct netem_sched_data *q = qdisc_priv(sch); 7237bc0f28cSHagen Paul Pfeifer const struct tc_netem_rate *r = nla_data(attr); 7247bc0f28cSHagen Paul Pfeifer 7257bc0f28cSHagen Paul Pfeifer q->rate = r->rate; 72690b41a1cSHagen Paul Pfeifer q->packet_overhead = r->packet_overhead; 72790b41a1cSHagen Paul Pfeifer q->cell_size = r->cell_size; 72890b41a1cSHagen Paul Pfeifer if (q->cell_size) 72990b41a1cSHagen Paul Pfeifer q->cell_size_reciprocal = reciprocal_value(q->cell_size); 73090b41a1cSHagen Paul Pfeifer q->cell_overhead = r->cell_overhead; 7317bc0f28cSHagen Paul Pfeifer } 7327bc0f28cSHagen Paul Pfeifer 733661b7972Sstephen hemminger static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) 734661b7972Sstephen hemminger { 735661b7972Sstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 736661b7972Sstephen hemminger const struct nlattr *la; 737661b7972Sstephen hemminger int rem; 738661b7972Sstephen hemminger 739661b7972Sstephen hemminger nla_for_each_nested(la, attr, rem) { 740661b7972Sstephen hemminger u16 type = nla_type(la); 741661b7972Sstephen hemminger 742661b7972Sstephen hemminger switch (type) { 743661b7972Sstephen hemminger case NETEM_LOSS_GI: { 744661b7972Sstephen hemminger const struct tc_netem_gimodel *gi = nla_data(la); 745661b7972Sstephen hemminger 7462494654dSstephen hemminger if (nla_len(la) < sizeof(struct tc_netem_gimodel)) { 747661b7972Sstephen hemminger pr_info("netem: incorrect gi model size\n"); 748661b7972Sstephen hemminger return -EINVAL; 749661b7972Sstephen hemminger } 750661b7972Sstephen hemminger 751661b7972Sstephen hemminger q->loss_model = CLG_4_STATES; 752661b7972Sstephen hemminger 753661b7972Sstephen hemminger q->clg.state = 1; 754661b7972Sstephen hemminger q->clg.a1 = gi->p13; 755661b7972Sstephen hemminger q->clg.a2 = gi->p31; 756661b7972Sstephen hemminger q->clg.a3 = gi->p32; 757661b7972Sstephen hemminger q->clg.a4 = gi->p14; 758661b7972Sstephen hemminger q->clg.a5 = gi->p23; 759661b7972Sstephen hemminger break; 760661b7972Sstephen hemminger } 761661b7972Sstephen hemminger 762661b7972Sstephen hemminger case NETEM_LOSS_GE: { 763661b7972Sstephen hemminger const struct tc_netem_gemodel *ge = nla_data(la); 764661b7972Sstephen hemminger 7652494654dSstephen hemminger if (nla_len(la) < sizeof(struct tc_netem_gemodel)) { 7662494654dSstephen hemminger pr_info("netem: incorrect ge model size\n"); 767661b7972Sstephen hemminger return -EINVAL; 768661b7972Sstephen hemminger } 769661b7972Sstephen hemminger 770661b7972Sstephen hemminger q->loss_model = CLG_GILB_ELL; 771661b7972Sstephen hemminger q->clg.state = 1; 772661b7972Sstephen hemminger q->clg.a1 = ge->p; 773661b7972Sstephen hemminger q->clg.a2 = ge->r; 774661b7972Sstephen hemminger q->clg.a3 = ge->h; 775661b7972Sstephen hemminger q->clg.a4 = ge->k1; 776661b7972Sstephen hemminger break; 777661b7972Sstephen hemminger } 778661b7972Sstephen hemminger 779661b7972Sstephen hemminger default: 780661b7972Sstephen hemminger pr_info("netem: unknown loss type %u\n", type); 781661b7972Sstephen hemminger return -EINVAL; 782661b7972Sstephen hemminger } 783661b7972Sstephen hemminger } 784661b7972Sstephen hemminger 785661b7972Sstephen hemminger return 0; 786661b7972Sstephen hemminger } 787661b7972Sstephen hemminger 78827a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 78927a3421eSPatrick McHardy [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 79027a3421eSPatrick McHardy [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 79127a3421eSPatrick McHardy [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 7927bc0f28cSHagen Paul Pfeifer [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, 793661b7972Sstephen hemminger [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, 794e4ae004bSEric Dumazet [TCA_NETEM_ECN] = { .type = NLA_U32 }, 7956a031f67SYang Yingliang [TCA_NETEM_RATE64] = { .type = NLA_U64 }, 79627a3421eSPatrick McHardy }; 79727a3421eSPatrick McHardy 7982c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 7992c10b32bSThomas Graf const struct nla_policy *policy, int len) 8002c10b32bSThomas Graf { 8012c10b32bSThomas Graf int nested_len = nla_len(nla) - NLA_ALIGN(len); 8022c10b32bSThomas Graf 803661b7972Sstephen hemminger if (nested_len < 0) { 804661b7972Sstephen hemminger pr_info("netem: invalid attributes len %d\n", nested_len); 8052c10b32bSThomas Graf return -EINVAL; 806661b7972Sstephen hemminger } 807661b7972Sstephen hemminger 8082c10b32bSThomas Graf if (nested_len >= nla_attr_size(0)) 8092c10b32bSThomas Graf return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 8102c10b32bSThomas Graf nested_len, policy); 811661b7972Sstephen hemminger 8122c10b32bSThomas Graf memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 8132c10b32bSThomas Graf return 0; 8142c10b32bSThomas Graf } 8152c10b32bSThomas Graf 816c865e5d9SStephen Hemminger /* Parse netlink message to set options */ 8171e90474cSPatrick McHardy static int netem_change(struct Qdisc *sch, struct nlattr *opt) 8181da177e4SLinus Torvalds { 8191da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 820b03f4672SPatrick McHardy struct nlattr *tb[TCA_NETEM_MAX + 1]; 8211da177e4SLinus Torvalds struct tc_netem_qopt *qopt; 8221da177e4SLinus Torvalds int ret; 8231da177e4SLinus Torvalds 824b03f4672SPatrick McHardy if (opt == NULL) 8251da177e4SLinus Torvalds return -EINVAL; 8261da177e4SLinus Torvalds 8272c10b32bSThomas Graf qopt = nla_data(opt); 8282c10b32bSThomas Graf ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); 829b03f4672SPatrick McHardy if (ret < 0) 830b03f4672SPatrick McHardy return ret; 831b03f4672SPatrick McHardy 83250612537SEric Dumazet sch->limit = qopt->limit; 8331da177e4SLinus Torvalds 8341da177e4SLinus Torvalds q->latency = qopt->latency; 8351da177e4SLinus Torvalds q->jitter = qopt->jitter; 8361da177e4SLinus Torvalds q->limit = qopt->limit; 8371da177e4SLinus Torvalds q->gap = qopt->gap; 8380dca51d3SStephen Hemminger q->counter = 0; 8391da177e4SLinus Torvalds q->loss = qopt->loss; 8401da177e4SLinus Torvalds q->duplicate = qopt->duplicate; 8411da177e4SLinus Torvalds 842bb2f8cc0SStephen Hemminger /* for compatibility with earlier versions. 843bb2f8cc0SStephen Hemminger * if gap is set, need to assume 100% probability 8440dca51d3SStephen Hemminger */ 845a362e0a7SStephen Hemminger if (q->gap) 8460dca51d3SStephen Hemminger q->reorder = ~0; 8470dca51d3SStephen Hemminger 848265eb67fSStephen Hemminger if (tb[TCA_NETEM_CORR]) 849265eb67fSStephen Hemminger get_correlation(sch, tb[TCA_NETEM_CORR]); 8501da177e4SLinus Torvalds 8511e90474cSPatrick McHardy if (tb[TCA_NETEM_DELAY_DIST]) { 8521e90474cSPatrick McHardy ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); 8531da177e4SLinus Torvalds if (ret) 8541da177e4SLinus Torvalds return ret; 8551da177e4SLinus Torvalds } 856c865e5d9SStephen Hemminger 857265eb67fSStephen Hemminger if (tb[TCA_NETEM_REORDER]) 858265eb67fSStephen Hemminger get_reorder(sch, tb[TCA_NETEM_REORDER]); 8591da177e4SLinus Torvalds 860265eb67fSStephen Hemminger if (tb[TCA_NETEM_CORRUPT]) 861265eb67fSStephen Hemminger get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 8621da177e4SLinus Torvalds 8637bc0f28cSHagen Paul Pfeifer if (tb[TCA_NETEM_RATE]) 8647bc0f28cSHagen Paul Pfeifer get_rate(sch, tb[TCA_NETEM_RATE]); 8657bc0f28cSHagen Paul Pfeifer 8666a031f67SYang Yingliang if (tb[TCA_NETEM_RATE64]) 8676a031f67SYang Yingliang q->rate = max_t(u64, q->rate, 8686a031f67SYang Yingliang nla_get_u64(tb[TCA_NETEM_RATE64])); 8696a031f67SYang Yingliang 870e4ae004bSEric Dumazet if (tb[TCA_NETEM_ECN]) 871e4ae004bSEric Dumazet q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); 872e4ae004bSEric Dumazet 873661b7972Sstephen hemminger q->loss_model = CLG_RANDOM; 874661b7972Sstephen hemminger if (tb[TCA_NETEM_LOSS]) 875661b7972Sstephen hemminger ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); 876661b7972Sstephen hemminger 877661b7972Sstephen hemminger return ret; 8781da177e4SLinus Torvalds } 8791da177e4SLinus Torvalds 8801e90474cSPatrick McHardy static int netem_init(struct Qdisc *sch, struct nlattr *opt) 8811da177e4SLinus Torvalds { 8821da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 8831da177e4SLinus Torvalds int ret; 8841da177e4SLinus Torvalds 8851da177e4SLinus Torvalds if (!opt) 8861da177e4SLinus Torvalds return -EINVAL; 8871da177e4SLinus Torvalds 88859cb5c67SPatrick McHardy qdisc_watchdog_init(&q->watchdog, sch); 8891da177e4SLinus Torvalds 890661b7972Sstephen hemminger q->loss_model = CLG_RANDOM; 8911da177e4SLinus Torvalds ret = netem_change(sch, opt); 89250612537SEric Dumazet if (ret) 893250a65f7Sstephen hemminger pr_info("netem: change failed\n"); 8941da177e4SLinus Torvalds return ret; 8951da177e4SLinus Torvalds } 8961da177e4SLinus Torvalds 8971da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch) 8981da177e4SLinus Torvalds { 8991da177e4SLinus Torvalds struct netem_sched_data *q = qdisc_priv(sch); 9001da177e4SLinus Torvalds 90159cb5c67SPatrick McHardy qdisc_watchdog_cancel(&q->watchdog); 90250612537SEric Dumazet if (q->qdisc) 9031da177e4SLinus Torvalds qdisc_destroy(q->qdisc); 9046373a9a2Sstephen hemminger dist_free(q->delay_dist); 9051da177e4SLinus Torvalds } 9061da177e4SLinus Torvalds 907661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q, 908661b7972Sstephen hemminger struct sk_buff *skb) 909661b7972Sstephen hemminger { 910661b7972Sstephen hemminger struct nlattr *nest; 911661b7972Sstephen hemminger 912661b7972Sstephen hemminger nest = nla_nest_start(skb, TCA_NETEM_LOSS); 913661b7972Sstephen hemminger if (nest == NULL) 914661b7972Sstephen hemminger goto nla_put_failure; 915661b7972Sstephen hemminger 916661b7972Sstephen hemminger switch (q->loss_model) { 917661b7972Sstephen hemminger case CLG_RANDOM: 918661b7972Sstephen hemminger /* legacy loss model */ 919661b7972Sstephen hemminger nla_nest_cancel(skb, nest); 920661b7972Sstephen hemminger return 0; /* no data */ 921661b7972Sstephen hemminger 922661b7972Sstephen hemminger case CLG_4_STATES: { 923661b7972Sstephen hemminger struct tc_netem_gimodel gi = { 924661b7972Sstephen hemminger .p13 = q->clg.a1, 925661b7972Sstephen hemminger .p31 = q->clg.a2, 926661b7972Sstephen hemminger .p32 = q->clg.a3, 927661b7972Sstephen hemminger .p14 = q->clg.a4, 928661b7972Sstephen hemminger .p23 = q->clg.a5, 929661b7972Sstephen hemminger }; 930661b7972Sstephen hemminger 9311b34ec43SDavid S. Miller if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi)) 9321b34ec43SDavid S. Miller goto nla_put_failure; 933661b7972Sstephen hemminger break; 934661b7972Sstephen hemminger } 935661b7972Sstephen hemminger case CLG_GILB_ELL: { 936661b7972Sstephen hemminger struct tc_netem_gemodel ge = { 937661b7972Sstephen hemminger .p = q->clg.a1, 938661b7972Sstephen hemminger .r = q->clg.a2, 939661b7972Sstephen hemminger .h = q->clg.a3, 940661b7972Sstephen hemminger .k1 = q->clg.a4, 941661b7972Sstephen hemminger }; 942661b7972Sstephen hemminger 9431b34ec43SDavid S. Miller if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge)) 9441b34ec43SDavid S. Miller goto nla_put_failure; 945661b7972Sstephen hemminger break; 946661b7972Sstephen hemminger } 947661b7972Sstephen hemminger } 948661b7972Sstephen hemminger 949661b7972Sstephen hemminger nla_nest_end(skb, nest); 950661b7972Sstephen hemminger return 0; 951661b7972Sstephen hemminger 952661b7972Sstephen hemminger nla_put_failure: 953661b7972Sstephen hemminger nla_nest_cancel(skb, nest); 954661b7972Sstephen hemminger return -1; 955661b7972Sstephen hemminger } 956661b7972Sstephen hemminger 9571da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 9581da177e4SLinus Torvalds { 9591da177e4SLinus Torvalds const struct netem_sched_data *q = qdisc_priv(sch); 960861d7f74Sstephen hemminger struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); 9611da177e4SLinus Torvalds struct tc_netem_qopt qopt; 9621da177e4SLinus Torvalds struct tc_netem_corr cor; 9630dca51d3SStephen Hemminger struct tc_netem_reorder reorder; 964c865e5d9SStephen Hemminger struct tc_netem_corrupt corrupt; 9657bc0f28cSHagen Paul Pfeifer struct tc_netem_rate rate; 9661da177e4SLinus Torvalds 9671da177e4SLinus Torvalds qopt.latency = q->latency; 9681da177e4SLinus Torvalds qopt.jitter = q->jitter; 9691da177e4SLinus Torvalds qopt.limit = q->limit; 9701da177e4SLinus Torvalds qopt.loss = q->loss; 9711da177e4SLinus Torvalds qopt.gap = q->gap; 9721da177e4SLinus Torvalds qopt.duplicate = q->duplicate; 9731b34ec43SDavid S. Miller if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) 9741b34ec43SDavid S. Miller goto nla_put_failure; 9751da177e4SLinus Torvalds 9761da177e4SLinus Torvalds cor.delay_corr = q->delay_cor.rho; 9771da177e4SLinus Torvalds cor.loss_corr = q->loss_cor.rho; 9781da177e4SLinus Torvalds cor.dup_corr = q->dup_cor.rho; 9791b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor)) 9801b34ec43SDavid S. Miller goto nla_put_failure; 9810dca51d3SStephen Hemminger 9820dca51d3SStephen Hemminger reorder.probability = q->reorder; 9830dca51d3SStephen Hemminger reorder.correlation = q->reorder_cor.rho; 9841b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder)) 9851b34ec43SDavid S. Miller goto nla_put_failure; 9860dca51d3SStephen Hemminger 987c865e5d9SStephen Hemminger corrupt.probability = q->corrupt; 988c865e5d9SStephen Hemminger corrupt.correlation = q->corrupt_cor.rho; 9891b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) 9901b34ec43SDavid S. Miller goto nla_put_failure; 991c865e5d9SStephen Hemminger 9926a031f67SYang Yingliang if (q->rate >= (1ULL << 32)) { 9936a031f67SYang Yingliang if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate)) 9946a031f67SYang Yingliang goto nla_put_failure; 9956a031f67SYang Yingliang rate.rate = ~0U; 9966a031f67SYang Yingliang } else { 9977bc0f28cSHagen Paul Pfeifer rate.rate = q->rate; 9986a031f67SYang Yingliang } 99990b41a1cSHagen Paul Pfeifer rate.packet_overhead = q->packet_overhead; 100090b41a1cSHagen Paul Pfeifer rate.cell_size = q->cell_size; 100190b41a1cSHagen Paul Pfeifer rate.cell_overhead = q->cell_overhead; 10021b34ec43SDavid S. Miller if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate)) 10031b34ec43SDavid S. Miller goto nla_put_failure; 10047bc0f28cSHagen Paul Pfeifer 1005e4ae004bSEric Dumazet if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn)) 1006e4ae004bSEric Dumazet goto nla_put_failure; 1007e4ae004bSEric Dumazet 1008661b7972Sstephen hemminger if (dump_loss_model(q, skb) != 0) 1009661b7972Sstephen hemminger goto nla_put_failure; 1010661b7972Sstephen hemminger 1011861d7f74Sstephen hemminger return nla_nest_end(skb, nla); 10121da177e4SLinus Torvalds 10131e90474cSPatrick McHardy nla_put_failure: 1014861d7f74Sstephen hemminger nlmsg_trim(skb, nla); 10151da177e4SLinus Torvalds return -1; 10161da177e4SLinus Torvalds } 10171da177e4SLinus Torvalds 101810f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 101910f6dfcfSstephen hemminger struct sk_buff *skb, struct tcmsg *tcm) 102010f6dfcfSstephen hemminger { 102110f6dfcfSstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 102210f6dfcfSstephen hemminger 102350612537SEric Dumazet if (cl != 1 || !q->qdisc) /* only one class */ 102410f6dfcfSstephen hemminger return -ENOENT; 102510f6dfcfSstephen hemminger 102610f6dfcfSstephen hemminger tcm->tcm_handle |= TC_H_MIN(1); 102710f6dfcfSstephen hemminger tcm->tcm_info = q->qdisc->handle; 102810f6dfcfSstephen hemminger 102910f6dfcfSstephen hemminger return 0; 103010f6dfcfSstephen hemminger } 103110f6dfcfSstephen hemminger 103210f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 103310f6dfcfSstephen hemminger struct Qdisc **old) 103410f6dfcfSstephen hemminger { 103510f6dfcfSstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 103610f6dfcfSstephen hemminger 103710f6dfcfSstephen hemminger sch_tree_lock(sch); 103810f6dfcfSstephen hemminger *old = q->qdisc; 103910f6dfcfSstephen hemminger q->qdisc = new; 104050612537SEric Dumazet if (*old) { 104110f6dfcfSstephen hemminger qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 104210f6dfcfSstephen hemminger qdisc_reset(*old); 104350612537SEric Dumazet } 104410f6dfcfSstephen hemminger sch_tree_unlock(sch); 104510f6dfcfSstephen hemminger 104610f6dfcfSstephen hemminger return 0; 104710f6dfcfSstephen hemminger } 104810f6dfcfSstephen hemminger 104910f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 105010f6dfcfSstephen hemminger { 105110f6dfcfSstephen hemminger struct netem_sched_data *q = qdisc_priv(sch); 105210f6dfcfSstephen hemminger return q->qdisc; 105310f6dfcfSstephen hemminger } 105410f6dfcfSstephen hemminger 105510f6dfcfSstephen hemminger static unsigned long netem_get(struct Qdisc *sch, u32 classid) 105610f6dfcfSstephen hemminger { 105710f6dfcfSstephen hemminger return 1; 105810f6dfcfSstephen hemminger } 105910f6dfcfSstephen hemminger 106010f6dfcfSstephen hemminger static void netem_put(struct Qdisc *sch, unsigned long arg) 106110f6dfcfSstephen hemminger { 106210f6dfcfSstephen hemminger } 106310f6dfcfSstephen hemminger 106410f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 106510f6dfcfSstephen hemminger { 106610f6dfcfSstephen hemminger if (!walker->stop) { 106710f6dfcfSstephen hemminger if (walker->count >= walker->skip) 106810f6dfcfSstephen hemminger if (walker->fn(sch, 1, walker) < 0) { 106910f6dfcfSstephen hemminger walker->stop = 1; 107010f6dfcfSstephen hemminger return; 107110f6dfcfSstephen hemminger } 107210f6dfcfSstephen hemminger walker->count++; 107310f6dfcfSstephen hemminger } 107410f6dfcfSstephen hemminger } 107510f6dfcfSstephen hemminger 107610f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = { 107710f6dfcfSstephen hemminger .graft = netem_graft, 107810f6dfcfSstephen hemminger .leaf = netem_leaf, 107910f6dfcfSstephen hemminger .get = netem_get, 108010f6dfcfSstephen hemminger .put = netem_put, 108110f6dfcfSstephen hemminger .walk = netem_walk, 108210f6dfcfSstephen hemminger .dump = netem_dump_class, 108310f6dfcfSstephen hemminger }; 108410f6dfcfSstephen hemminger 108520fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 10861da177e4SLinus Torvalds .id = "netem", 108710f6dfcfSstephen hemminger .cl_ops = &netem_class_ops, 10881da177e4SLinus Torvalds .priv_size = sizeof(struct netem_sched_data), 10891da177e4SLinus Torvalds .enqueue = netem_enqueue, 10901da177e4SLinus Torvalds .dequeue = netem_dequeue, 109177be155cSJarek Poplawski .peek = qdisc_peek_dequeued, 10921da177e4SLinus Torvalds .drop = netem_drop, 10931da177e4SLinus Torvalds .init = netem_init, 10941da177e4SLinus Torvalds .reset = netem_reset, 10951da177e4SLinus Torvalds .destroy = netem_destroy, 10961da177e4SLinus Torvalds .change = netem_change, 10971da177e4SLinus Torvalds .dump = netem_dump, 10981da177e4SLinus Torvalds .owner = THIS_MODULE, 10991da177e4SLinus Torvalds }; 11001da177e4SLinus Torvalds 11011da177e4SLinus Torvalds 11021da177e4SLinus Torvalds static int __init netem_module_init(void) 11031da177e4SLinus Torvalds { 1104eb229c4cSStephen Hemminger pr_info("netem: version " VERSION "\n"); 11051da177e4SLinus Torvalds return register_qdisc(&netem_qdisc_ops); 11061da177e4SLinus Torvalds } 11071da177e4SLinus Torvalds static void __exit netem_module_exit(void) 11081da177e4SLinus Torvalds { 11091da177e4SLinus Torvalds unregister_qdisc(&netem_qdisc_ops); 11101da177e4SLinus Torvalds } 11111da177e4SLinus Torvalds module_init(netem_module_init) 11121da177e4SLinus Torvalds module_exit(netem_module_exit) 11131da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1114