1 /* 2 * TCP Illinois congestion control. 3 * Home page: 4 * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html 5 * 6 * The algorithm is described in: 7 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm 8 * for High-Speed Networks" 9 * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf 10 * 11 * Implemented from description in paper and ns-2 simulation. 12 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org> 13 */ 14 15 #include <linux/module.h> 16 #include <linux/skbuff.h> 17 #include <linux/inet_diag.h> 18 #include <asm/div64.h> 19 #include <net/tcp.h> 20 21 #define ALPHA_SHIFT 7 22 #define ALPHA_SCALE (1u<<ALPHA_SHIFT) 23 #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ 24 #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ 25 #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ 26 #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ 27 28 #define BETA_SHIFT 6 29 #define BETA_SCALE (1u<<BETA_SHIFT) 30 #define BETA_MIN (BETA_SCALE/8) /* 0.125 */ 31 #define BETA_MAX (BETA_SCALE/2) /* 0.5 */ 32 #define BETA_BASE BETA_MAX 33 34 static int win_thresh __read_mostly = 15; 35 module_param(win_thresh, int, 0); 36 MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); 37 38 static int theta __read_mostly = 5; 39 module_param(theta, int, 0); 40 MODULE_PARM_DESC(theta, "# of fast RTT's before full growth"); 41 42 /* TCP Illinois Parameters */ 43 struct illinois { 44 u64 sum_rtt; /* sum of rtt's measured within last rtt */ 45 u16 cnt_rtt; /* # of rtts measured within last rtt */ 46 u32 base_rtt; /* min of all rtt in usec */ 47 u32 max_rtt; /* max of all rtt in usec */ 48 u32 end_seq; /* right edge of current RTT */ 49 u32 alpha; /* Additive increase */ 50 u32 beta; /* Muliplicative decrease */ 51 u16 acked; /* # packets acked by current ACK */ 52 u8 rtt_above; /* average rtt has gone above threshold */ 53 u8 rtt_low; /* # of rtts measurements below threshold */ 54 }; 55 56 static void rtt_reset(struct sock *sk) 57 { 58 struct tcp_sock *tp = tcp_sk(sk); 59 struct illinois *ca = inet_csk_ca(sk); 60 61 ca->end_seq = tp->snd_nxt; 62 ca->cnt_rtt = 0; 63 ca->sum_rtt = 0; 64 65 /* TODO: age max_rtt? */ 66 } 67 68 static void tcp_illinois_init(struct sock *sk) 69 { 70 struct illinois *ca = inet_csk_ca(sk); 71 72 ca->alpha = ALPHA_MAX; 73 ca->beta = BETA_BASE; 74 ca->base_rtt = 0x7fffffff; 75 ca->max_rtt = 0; 76 77 ca->acked = 0; 78 ca->rtt_low = 0; 79 ca->rtt_above = 0; 80 81 rtt_reset(sk); 82 } 83 84 /* Measure RTT for each ack. */ 85 static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample) 86 { 87 struct illinois *ca = inet_csk_ca(sk); 88 s32 rtt_us = sample->rtt_us; 89 90 ca->acked = sample->pkts_acked; 91 92 /* dup ack, no rtt sample */ 93 if (rtt_us < 0) 94 return; 95 96 /* ignore bogus values, this prevents wraparound in alpha math */ 97 if (rtt_us > RTT_MAX) 98 rtt_us = RTT_MAX; 99 100 /* keep track of minimum RTT seen so far */ 101 if (ca->base_rtt > rtt_us) 102 ca->base_rtt = rtt_us; 103 104 /* and max */ 105 if (ca->max_rtt < rtt_us) 106 ca->max_rtt = rtt_us; 107 108 ++ca->cnt_rtt; 109 ca->sum_rtt += rtt_us; 110 } 111 112 /* Maximum queuing delay */ 113 static inline u32 max_delay(const struct illinois *ca) 114 { 115 return ca->max_rtt - ca->base_rtt; 116 } 117 118 /* Average queuing delay */ 119 static inline u32 avg_delay(const struct illinois *ca) 120 { 121 u64 t = ca->sum_rtt; 122 123 do_div(t, ca->cnt_rtt); 124 return t - ca->base_rtt; 125 } 126 127 /* 128 * Compute value of alpha used for additive increase. 129 * If small window then use 1.0, equivalent to Reno. 130 * 131 * For larger windows, adjust based on average delay. 132 * A. If average delay is at minimum (we are uncongested), 133 * then use large alpha (10.0) to increase faster. 134 * B. If average delay is at maximum (getting congested) 135 * then use small alpha (0.3) 136 * 137 * The result is a convex window growth curve. 138 */ 139 static u32 alpha(struct illinois *ca, u32 da, u32 dm) 140 { 141 u32 d1 = dm / 100; /* Low threshold */ 142 143 if (da <= d1) { 144 /* If never got out of low delay zone, then use max */ 145 if (!ca->rtt_above) 146 return ALPHA_MAX; 147 148 /* Wait for 5 good RTT's before allowing alpha to go alpha max. 149 * This prevents one good RTT from causing sudden window increase. 150 */ 151 if (++ca->rtt_low < theta) 152 return ca->alpha; 153 154 ca->rtt_low = 0; 155 ca->rtt_above = 0; 156 return ALPHA_MAX; 157 } 158 159 ca->rtt_above = 1; 160 161 /* 162 * Based on: 163 * 164 * (dm - d1) amin amax 165 * k1 = ------------------- 166 * amax - amin 167 * 168 * (dm - d1) amin 169 * k2 = ---------------- - d1 170 * amax - amin 171 * 172 * k1 173 * alpha = ---------- 174 * k2 + da 175 */ 176 177 dm -= d1; 178 da -= d1; 179 return (dm * ALPHA_MAX) / 180 (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); 181 } 182 183 /* 184 * Beta used for multiplicative decrease. 185 * For small window sizes returns same value as Reno (0.5) 186 * 187 * If delay is small (10% of max) then beta = 1/8 188 * If delay is up to 80% of max then beta = 1/2 189 * In between is a linear function 190 */ 191 static u32 beta(u32 da, u32 dm) 192 { 193 u32 d2, d3; 194 195 d2 = dm / 10; 196 if (da <= d2) 197 return BETA_MIN; 198 199 d3 = (8 * dm) / 10; 200 if (da >= d3 || d3 <= d2) 201 return BETA_MAX; 202 203 /* 204 * Based on: 205 * 206 * bmin d3 - bmax d2 207 * k3 = ------------------- 208 * d3 - d2 209 * 210 * bmax - bmin 211 * k4 = ------------- 212 * d3 - d2 213 * 214 * b = k3 + k4 da 215 */ 216 return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da) 217 / (d3 - d2); 218 } 219 220 /* Update alpha and beta values once per RTT */ 221 static void update_params(struct sock *sk) 222 { 223 struct tcp_sock *tp = tcp_sk(sk); 224 struct illinois *ca = inet_csk_ca(sk); 225 226 if (tp->snd_cwnd < win_thresh) { 227 ca->alpha = ALPHA_BASE; 228 ca->beta = BETA_BASE; 229 } else if (ca->cnt_rtt > 0) { 230 u32 dm = max_delay(ca); 231 u32 da = avg_delay(ca); 232 233 ca->alpha = alpha(ca, da, dm); 234 ca->beta = beta(da, dm); 235 } 236 237 rtt_reset(sk); 238 } 239 240 /* 241 * In case of loss, reset to default values 242 */ 243 static void tcp_illinois_state(struct sock *sk, u8 new_state) 244 { 245 struct illinois *ca = inet_csk_ca(sk); 246 247 if (new_state == TCP_CA_Loss) { 248 ca->alpha = ALPHA_BASE; 249 ca->beta = BETA_BASE; 250 ca->rtt_low = 0; 251 ca->rtt_above = 0; 252 rtt_reset(sk); 253 } 254 } 255 256 /* 257 * Increase window in response to successful acknowledgment. 258 */ 259 static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) 260 { 261 struct tcp_sock *tp = tcp_sk(sk); 262 struct illinois *ca = inet_csk_ca(sk); 263 264 if (after(ack, ca->end_seq)) 265 update_params(sk); 266 267 /* RFC2861 only increase cwnd if fully utilized */ 268 if (!tcp_is_cwnd_limited(sk)) 269 return; 270 271 /* In slow start */ 272 if (tcp_in_slow_start(tp)) 273 tcp_slow_start(tp, acked); 274 275 else { 276 u32 delta; 277 278 /* snd_cwnd_cnt is # of packets since last cwnd increment */ 279 tp->snd_cwnd_cnt += ca->acked; 280 ca->acked = 1; 281 282 /* This is close approximation of: 283 * tp->snd_cwnd += alpha/tp->snd_cwnd 284 */ 285 delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; 286 if (delta >= tp->snd_cwnd) { 287 tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, 288 (u32)tp->snd_cwnd_clamp); 289 tp->snd_cwnd_cnt = 0; 290 } 291 } 292 } 293 294 static u32 tcp_illinois_ssthresh(struct sock *sk) 295 { 296 struct tcp_sock *tp = tcp_sk(sk); 297 struct illinois *ca = inet_csk_ca(sk); 298 299 /* Multiplicative decrease */ 300 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); 301 } 302 303 /* Extract info for Tcp socket info provided via netlink. */ 304 static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, 305 union tcp_cc_info *info) 306 { 307 const struct illinois *ca = inet_csk_ca(sk); 308 309 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 310 info->vegas.tcpv_enabled = 1; 311 info->vegas.tcpv_rttcnt = ca->cnt_rtt; 312 info->vegas.tcpv_minrtt = ca->base_rtt; 313 info->vegas.tcpv_rtt = 0; 314 315 if (info->vegas.tcpv_rttcnt > 0) { 316 u64 t = ca->sum_rtt; 317 318 do_div(t, info->vegas.tcpv_rttcnt); 319 info->vegas.tcpv_rtt = t; 320 } 321 *attr = INET_DIAG_VEGASINFO; 322 return sizeof(struct tcpvegas_info); 323 } 324 return 0; 325 } 326 327 static struct tcp_congestion_ops tcp_illinois __read_mostly = { 328 .init = tcp_illinois_init, 329 .ssthresh = tcp_illinois_ssthresh, 330 .undo_cwnd = tcp_reno_undo_cwnd, 331 .cong_avoid = tcp_illinois_cong_avoid, 332 .set_state = tcp_illinois_state, 333 .get_info = tcp_illinois_info, 334 .pkts_acked = tcp_illinois_acked, 335 336 .owner = THIS_MODULE, 337 .name = "illinois", 338 }; 339 340 static int __init tcp_illinois_register(void) 341 { 342 BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE); 343 return tcp_register_congestion_control(&tcp_illinois); 344 } 345 346 static void __exit tcp_illinois_unregister(void) 347 { 348 tcp_unregister_congestion_control(&tcp_illinois); 349 } 350 351 module_init(tcp_illinois_register); 352 module_exit(tcp_illinois_unregister); 353 354 MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); 355 MODULE_LICENSE("GPL"); 356 MODULE_DESCRIPTION("TCP Illinois"); 357 MODULE_VERSION("1.0"); 358