1 /* 2 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK 3 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. 4 * 5 * An implementation of the DCCP protocol 6 * 7 * This code has been developed by the University of Waikato WAND 8 * research group. For further information please see http://www.wand.net.nz/ 9 * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz 10 * 11 * This code also uses code from Lulea University, rereleased as GPL by its 12 * authors: 13 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon 14 * 15 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft 16 * and to make it work as a loadable module in the DCCP stack written by 17 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. 18 * 19 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 20 * 21 * This program is free software; you can redistribute it and/or modify 22 * it under the terms of the GNU General Public License as published by 23 * the Free Software Foundation; either version 2 of the License, or 24 * (at your option) any later version. 25 * 26 * This program is distributed in the hope that it will be useful, 27 * but WITHOUT ANY WARRANTY; without even the implied warranty of 28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29 * GNU General Public License for more details. 30 * 31 * You should have received a copy of the GNU General Public License 32 * along with this program; if not, write to the Free Software 33 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 34 */ 35 36 #include <linux/string.h> 37 #include <linux/slab.h> 38 #include "packet_history.h" 39 #include "../../dccp.h" 40 41 /** 42 * tfrc_tx_hist_entry - Simple singly-linked TX history list 43 * @next: next oldest entry (LIFO order) 44 * @seqno: sequence number of this entry 45 * @stamp: send time of packet with sequence number @seqno 46 */ 47 struct tfrc_tx_hist_entry { 48 struct tfrc_tx_hist_entry *next; 49 u64 seqno; 50 ktime_t stamp; 51 }; 52 53 /* 54 * Transmitter History Routines 55 */ 56 static struct kmem_cache *tfrc_tx_hist_slab; 57 58 int __init tfrc_tx_packet_history_init(void) 59 { 60 tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist", 61 sizeof(struct tfrc_tx_hist_entry), 62 0, SLAB_HWCACHE_ALIGN, NULL); 63 return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0; 64 } 65 66 void tfrc_tx_packet_history_exit(void) 67 { 68 if (tfrc_tx_hist_slab != NULL) { 69 kmem_cache_destroy(tfrc_tx_hist_slab); 70 tfrc_tx_hist_slab = NULL; 71 } 72 } 73 74 static struct tfrc_tx_hist_entry * 75 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) 76 { 77 while (head != NULL && head->seqno != seqno) 78 head = head->next; 79 80 return head; 81 } 82 83 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) 84 { 85 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); 86 87 if (entry == NULL) 88 return -ENOBUFS; 89 entry->seqno = seqno; 90 entry->stamp = ktime_get_real(); 91 entry->next = *headp; 92 *headp = entry; 93 return 0; 94 } 95 96 void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) 97 { 98 struct tfrc_tx_hist_entry *head = *headp; 99 100 while (head != NULL) { 101 struct tfrc_tx_hist_entry *next = head->next; 102 103 kmem_cache_free(tfrc_tx_hist_slab, head); 104 head = next; 105 } 106 107 *headp = NULL; 108 } 109 110 u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, 111 const ktime_t now) 112 { 113 u32 rtt = 0; 114 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); 115 116 if (packet != NULL) { 117 rtt = ktime_us_delta(now, packet->stamp); 118 /* 119 * Garbage-collect older (irrelevant) entries: 120 */ 121 tfrc_tx_hist_purge(&packet->next); 122 } 123 124 return rtt; 125 } 126 127 128 /* 129 * Receiver History Routines 130 */ 131 static struct kmem_cache *tfrc_rx_hist_slab; 132 133 int __init tfrc_rx_packet_history_init(void) 134 { 135 tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache", 136 sizeof(struct tfrc_rx_hist_entry), 137 0, SLAB_HWCACHE_ALIGN, NULL); 138 return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0; 139 } 140 141 void tfrc_rx_packet_history_exit(void) 142 { 143 if (tfrc_rx_hist_slab != NULL) { 144 kmem_cache_destroy(tfrc_rx_hist_slab); 145 tfrc_rx_hist_slab = NULL; 146 } 147 } 148 149 static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, 150 const struct sk_buff *skb, 151 const u64 ndp) 152 { 153 const struct dccp_hdr *dh = dccp_hdr(skb); 154 155 entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 156 entry->tfrchrx_ccval = dh->dccph_ccval; 157 entry->tfrchrx_type = dh->dccph_type; 158 entry->tfrchrx_ndp = ndp; 159 entry->tfrchrx_tstamp = ktime_get_real(); 160 } 161 162 void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, 163 const struct sk_buff *skb, 164 const u64 ndp) 165 { 166 struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h); 167 168 tfrc_rx_hist_entry_from_skb(entry, skb, ndp); 169 } 170 171 /* has the packet contained in skb been seen before? */ 172 int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) 173 { 174 const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq; 175 int i; 176 177 if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0) 178 return 1; 179 180 for (i = 1; i <= h->loss_count; i++) 181 if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq) 182 return 1; 183 184 return 0; 185 } 186 187 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) 188 { 189 const u8 idx_a = tfrc_rx_hist_index(h, a), 190 idx_b = tfrc_rx_hist_index(h, b); 191 struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; 192 193 h->ring[idx_a] = h->ring[idx_b]; 194 h->ring[idx_b] = tmp; 195 } 196 197 /* 198 * Private helper functions for loss detection. 199 * 200 * In the descriptions, `Si' refers to the sequence number of entry number i, 201 * whose NDP count is `Ni' (lower case is used for variables). 202 * Note: All __xxx_loss functions expect that a test against duplicates has been 203 * performed already: the seqno of the skb must not be less than the seqno 204 * of loss_prev; and it must not equal that of any valid history entry. 205 */ 206 static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) 207 { 208 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, 209 s1 = DCCP_SKB_CB(skb)->dccpd_seq; 210 211 if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ 212 h->loss_count = 1; 213 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); 214 } 215 } 216 217 static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) 218 { 219 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, 220 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, 221 s2 = DCCP_SKB_CB(skb)->dccpd_seq; 222 223 if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */ 224 h->loss_count = 2; 225 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2); 226 return; 227 } 228 229 /* S0 < S2 < S1 */ 230 231 if (dccp_loss_free(s0, s2, n2)) { 232 u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; 233 234 if (dccp_loss_free(s2, s1, n1)) { 235 /* hole is filled: S0, S2, and S1 are consecutive */ 236 h->loss_count = 0; 237 h->loss_start = tfrc_rx_hist_index(h, 1); 238 } else 239 /* gap between S2 and S1: just update loss_prev */ 240 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); 241 242 } else { /* gap between S0 and S2 */ 243 /* 244 * Reorder history to insert S2 between S0 and S1 245 */ 246 tfrc_rx_hist_swap(h, 0, 3); 247 h->loss_start = tfrc_rx_hist_index(h, 3); 248 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2); 249 h->loss_count = 2; 250 } 251 } 252 253 /* return 1 if a new loss event has been identified */ 254 static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) 255 { 256 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, 257 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, 258 s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, 259 s3 = DCCP_SKB_CB(skb)->dccpd_seq; 260 261 if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */ 262 h->loss_count = 3; 263 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3); 264 return 1; 265 } 266 267 /* S3 < S2 */ 268 269 if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */ 270 /* 271 * Reorder history to insert S3 between S1 and S2 272 */ 273 tfrc_rx_hist_swap(h, 2, 3); 274 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3); 275 h->loss_count = 3; 276 return 1; 277 } 278 279 /* S0 < S3 < S1 */ 280 281 if (dccp_loss_free(s0, s3, n3)) { 282 u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; 283 284 if (dccp_loss_free(s3, s1, n1)) { 285 /* hole between S0 and S1 filled by S3 */ 286 u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; 287 288 if (dccp_loss_free(s1, s2, n2)) { 289 /* entire hole filled by S0, S3, S1, S2 */ 290 h->loss_start = tfrc_rx_hist_index(h, 2); 291 h->loss_count = 0; 292 } else { 293 /* gap remains between S1 and S2 */ 294 h->loss_start = tfrc_rx_hist_index(h, 1); 295 h->loss_count = 1; 296 } 297 298 } else /* gap exists between S3 and S1, loss_count stays at 2 */ 299 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3); 300 301 return 0; 302 } 303 304 /* 305 * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3 306 * Reorder history to insert S3 between S0 and S1. 307 */ 308 tfrc_rx_hist_swap(h, 0, 3); 309 h->loss_start = tfrc_rx_hist_index(h, 3); 310 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3); 311 h->loss_count = 3; 312 313 return 1; 314 } 315 316 /* recycle RX history records to continue loss detection if necessary */ 317 static void __three_after_loss(struct tfrc_rx_hist *h) 318 { 319 /* 320 * At this stage we know already that there is a gap between S0 and S1 321 * (since S0 was the highest sequence number received before detecting 322 * the loss). To recycle the loss record, it is thus only necessary to 323 * check for other possible gaps between S1/S2 and between S2/S3. 324 */ 325 u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, 326 s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, 327 s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno; 328 u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, 329 n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp; 330 331 if (dccp_loss_free(s1, s2, n2)) { 332 333 if (dccp_loss_free(s2, s3, n3)) { 334 /* no gap between S2 and S3: entire hole is filled */ 335 h->loss_start = tfrc_rx_hist_index(h, 3); 336 h->loss_count = 0; 337 } else { 338 /* gap between S2 and S3 */ 339 h->loss_start = tfrc_rx_hist_index(h, 2); 340 h->loss_count = 1; 341 } 342 343 } else { /* gap between S1 and S2 */ 344 h->loss_start = tfrc_rx_hist_index(h, 1); 345 h->loss_count = 2; 346 } 347 } 348 349 /** 350 * tfrc_rx_handle_loss - Loss detection and further processing 351 * @h: The non-empty RX history object 352 * @lh: Loss Intervals database to update 353 * @skb: Currently received packet 354 * @ndp: The NDP count belonging to @skb 355 * @calc_first_li: Caller-dependent computation of first loss interval in @lh 356 * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) 357 * Chooses action according to pending loss, updates LI database when a new 358 * loss was detected, and does required post-processing. Returns 1 when caller 359 * should send feedback, 0 otherwise. 360 * Since it also takes care of reordering during loss detection and updates the 361 * records accordingly, the caller should not perform any more RX history 362 * operations when loss_count is greater than 0 after calling this function. 363 */ 364 int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, 365 struct tfrc_loss_hist *lh, 366 struct sk_buff *skb, const u64 ndp, 367 u32 (*calc_first_li)(struct sock *), struct sock *sk) 368 { 369 int is_new_loss = 0; 370 371 if (h->loss_count == 0) { 372 __do_track_loss(h, skb, ndp); 373 } else if (h->loss_count == 1) { 374 __one_after_loss(h, skb, ndp); 375 } else if (h->loss_count != 2) { 376 DCCP_BUG("invalid loss_count %d", h->loss_count); 377 } else if (__two_after_loss(h, skb, ndp)) { 378 /* 379 * Update Loss Interval database and recycle RX records 380 */ 381 is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); 382 __three_after_loss(h); 383 } 384 return is_new_loss; 385 } 386 387 int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) 388 { 389 int i; 390 391 for (i = 0; i <= TFRC_NDUPACK; i++) { 392 h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); 393 if (h->ring[i] == NULL) 394 goto out_free; 395 } 396 397 h->loss_count = h->loss_start = 0; 398 return 0; 399 400 out_free: 401 while (i-- != 0) { 402 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); 403 h->ring[i] = NULL; 404 } 405 return -ENOBUFS; 406 } 407 408 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) 409 { 410 int i; 411 412 for (i = 0; i <= TFRC_NDUPACK; ++i) 413 if (h->ring[i] != NULL) { 414 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); 415 h->ring[i] = NULL; 416 } 417 } 418 419 /** 420 * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against 421 */ 422 static inline struct tfrc_rx_hist_entry * 423 tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) 424 { 425 return h->ring[0]; 426 } 427 428 /** 429 * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry 430 */ 431 static inline struct tfrc_rx_hist_entry * 432 tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) 433 { 434 return h->ring[h->rtt_sample_prev]; 435 } 436 437 /** 438 * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal 439 * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able 440 * to compute a sample with given data - calling function should check this. 441 */ 442 u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) 443 { 444 u32 sample = 0, 445 delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, 446 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); 447 448 if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ 449 if (h->rtt_sample_prev == 2) { /* previous candidate stored */ 450 sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, 451 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); 452 if (sample) 453 sample = 4 / sample * 454 ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, 455 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); 456 else /* 457 * FIXME: This condition is in principle not 458 * possible but occurs when CCID is used for 459 * two-way data traffic. I have tried to trace 460 * it, but the cause does not seem to be here. 461 */ 462 DCCP_BUG("please report to dccp@vger.kernel.org" 463 " => prev = %u, last = %u", 464 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, 465 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); 466 } else if (delta_v < 1) { 467 h->rtt_sample_prev = 1; 468 goto keep_ref_for_next_time; 469 } 470 471 } else if (delta_v == 4) /* optimal match */ 472 sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); 473 else { /* suboptimal match */ 474 h->rtt_sample_prev = 2; 475 goto keep_ref_for_next_time; 476 } 477 478 if (unlikely(sample > DCCP_SANE_RTT_MAX)) { 479 DCCP_WARN("RTT sample %u too large, using max\n", sample); 480 sample = DCCP_SANE_RTT_MAX; 481 } 482 483 h->rtt_sample_prev = 0; /* use current entry as next reference */ 484 keep_ref_for_next_time: 485 486 return sample; 487 } 488