1 #include <linux/rcupdate.h> 2 #include <linux/spinlock.h> 3 #include <linux/jiffies.h> 4 #include <linux/bootmem.h> 5 #include <linux/module.h> 6 #include <linux/cache.h> 7 #include <linux/slab.h> 8 #include <linux/init.h> 9 #include <linux/tcp.h> 10 #include <linux/hash.h> 11 12 #include <net/inet_connection_sock.h> 13 #include <net/net_namespace.h> 14 #include <net/request_sock.h> 15 #include <net/inetpeer.h> 16 #include <net/sock.h> 17 #include <net/ipv6.h> 18 #include <net/dst.h> 19 #include <net/tcp.h> 20 21 int sysctl_tcp_nometrics_save __read_mostly; 22 23 enum tcp_metric_index { 24 TCP_METRIC_RTT, 25 TCP_METRIC_RTTVAR, 26 TCP_METRIC_SSTHRESH, 27 TCP_METRIC_CWND, 28 TCP_METRIC_REORDERING, 29 30 /* Always last. */ 31 TCP_METRIC_MAX, 32 }; 33 34 struct tcp_fastopen_metrics { 35 u16 mss; 36 u16 syn_loss:10; /* Recurring Fast Open SYN losses */ 37 unsigned long last_syn_loss; /* Last Fast Open SYN loss */ 38 struct tcp_fastopen_cookie cookie; 39 }; 40 41 struct tcp_metrics_block { 42 struct tcp_metrics_block __rcu *tcpm_next; 43 struct inetpeer_addr tcpm_addr; 44 unsigned long tcpm_stamp; 45 u32 tcpm_ts; 46 u32 tcpm_ts_stamp; 47 u32 tcpm_lock; 48 u32 tcpm_vals[TCP_METRIC_MAX]; 49 struct tcp_fastopen_metrics tcpm_fastopen; 50 }; 51 52 static bool tcp_metric_locked(struct tcp_metrics_block *tm, 53 enum tcp_metric_index idx) 54 { 55 return tm->tcpm_lock & (1 << idx); 56 } 57 58 static u32 tcp_metric_get(struct tcp_metrics_block *tm, 59 enum tcp_metric_index idx) 60 { 61 return tm->tcpm_vals[idx]; 62 } 63 64 static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, 65 enum tcp_metric_index idx) 66 { 67 return msecs_to_jiffies(tm->tcpm_vals[idx]); 68 } 69 70 static void tcp_metric_set(struct tcp_metrics_block *tm, 71 enum tcp_metric_index idx, 72 u32 val) 73 { 74 tm->tcpm_vals[idx] = val; 75 } 76 77 static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, 78 enum tcp_metric_index idx, 79 u32 val) 80 { 81 tm->tcpm_vals[idx] = jiffies_to_msecs(val); 82 } 83 84 static bool addr_same(const struct inetpeer_addr *a, 85 const struct inetpeer_addr *b) 86 { 87 const struct in6_addr *a6, *b6; 88 89 if (a->family != b->family) 90 return false; 91 if (a->family == AF_INET) 92 return a->addr.a4 == b->addr.a4; 93 94 a6 = (const struct in6_addr *) &a->addr.a6[0]; 95 b6 = (const struct in6_addr *) &b->addr.a6[0]; 96 97 return ipv6_addr_equal(a6, b6); 98 } 99 100 struct tcpm_hash_bucket { 101 struct tcp_metrics_block __rcu *chain; 102 }; 103 104 static DEFINE_SPINLOCK(tcp_metrics_lock); 105 106 static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) 107 { 108 u32 val; 109 110 tm->tcpm_stamp = jiffies; 111 112 val = 0; 113 if (dst_metric_locked(dst, RTAX_RTT)) 114 val |= 1 << TCP_METRIC_RTT; 115 if (dst_metric_locked(dst, RTAX_RTTVAR)) 116 val |= 1 << TCP_METRIC_RTTVAR; 117 if (dst_metric_locked(dst, RTAX_SSTHRESH)) 118 val |= 1 << TCP_METRIC_SSTHRESH; 119 if (dst_metric_locked(dst, RTAX_CWND)) 120 val |= 1 << TCP_METRIC_CWND; 121 if (dst_metric_locked(dst, RTAX_REORDERING)) 122 val |= 1 << TCP_METRIC_REORDERING; 123 tm->tcpm_lock = val; 124 125 tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); 126 tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); 127 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); 128 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); 129 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 130 tm->tcpm_ts = 0; 131 tm->tcpm_ts_stamp = 0; 132 tm->tcpm_fastopen.mss = 0; 133 tm->tcpm_fastopen.syn_loss = 0; 134 tm->tcpm_fastopen.cookie.len = 0; 135 } 136 137 static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, 138 struct inetpeer_addr *addr, 139 unsigned int hash, 140 bool reclaim) 141 { 142 struct tcp_metrics_block *tm; 143 struct net *net; 144 145 spin_lock_bh(&tcp_metrics_lock); 146 net = dev_net(dst->dev); 147 if (unlikely(reclaim)) { 148 struct tcp_metrics_block *oldest; 149 150 oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); 151 for (tm = rcu_dereference(oldest->tcpm_next); tm; 152 tm = rcu_dereference(tm->tcpm_next)) { 153 if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) 154 oldest = tm; 155 } 156 tm = oldest; 157 } else { 158 tm = kmalloc(sizeof(*tm), GFP_ATOMIC); 159 if (!tm) 160 goto out_unlock; 161 } 162 tm->tcpm_addr = *addr; 163 164 tcpm_suck_dst(tm, dst); 165 166 if (likely(!reclaim)) { 167 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; 168 rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); 169 } 170 171 out_unlock: 172 spin_unlock_bh(&tcp_metrics_lock); 173 return tm; 174 } 175 176 #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) 177 178 static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) 179 { 180 if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) 181 tcpm_suck_dst(tm, dst); 182 } 183 184 #define TCP_METRICS_RECLAIM_DEPTH 5 185 #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL 186 187 static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) 188 { 189 if (tm) 190 return tm; 191 if (depth > TCP_METRICS_RECLAIM_DEPTH) 192 return TCP_METRICS_RECLAIM_PTR; 193 return NULL; 194 } 195 196 static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, 197 struct net *net, unsigned int hash) 198 { 199 struct tcp_metrics_block *tm; 200 int depth = 0; 201 202 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 203 tm = rcu_dereference(tm->tcpm_next)) { 204 if (addr_same(&tm->tcpm_addr, addr)) 205 break; 206 depth++; 207 } 208 return tcp_get_encode(tm, depth); 209 } 210 211 static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, 212 struct dst_entry *dst) 213 { 214 struct tcp_metrics_block *tm; 215 struct inetpeer_addr addr; 216 unsigned int hash; 217 struct net *net; 218 219 addr.family = req->rsk_ops->family; 220 switch (addr.family) { 221 case AF_INET: 222 addr.addr.a4 = inet_rsk(req)->rmt_addr; 223 hash = (__force unsigned int) addr.addr.a4; 224 break; 225 case AF_INET6: 226 *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; 227 hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr); 228 break; 229 default: 230 return NULL; 231 } 232 233 net = dev_net(dst->dev); 234 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 235 236 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 237 tm = rcu_dereference(tm->tcpm_next)) { 238 if (addr_same(&tm->tcpm_addr, &addr)) 239 break; 240 } 241 tcpm_check_stamp(tm, dst); 242 return tm; 243 } 244 245 static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) 246 { 247 struct inet6_timewait_sock *tw6; 248 struct tcp_metrics_block *tm; 249 struct inetpeer_addr addr; 250 unsigned int hash; 251 struct net *net; 252 253 addr.family = tw->tw_family; 254 switch (addr.family) { 255 case AF_INET: 256 addr.addr.a4 = tw->tw_daddr; 257 hash = (__force unsigned int) addr.addr.a4; 258 break; 259 case AF_INET6: 260 tw6 = inet6_twsk((struct sock *)tw); 261 *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; 262 hash = ipv6_addr_hash(&tw6->tw_v6_daddr); 263 break; 264 default: 265 return NULL; 266 } 267 268 net = twsk_net(tw); 269 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 270 271 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 272 tm = rcu_dereference(tm->tcpm_next)) { 273 if (addr_same(&tm->tcpm_addr, &addr)) 274 break; 275 } 276 return tm; 277 } 278 279 static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, 280 struct dst_entry *dst, 281 bool create) 282 { 283 struct tcp_metrics_block *tm; 284 struct inetpeer_addr addr; 285 unsigned int hash; 286 struct net *net; 287 bool reclaim; 288 289 addr.family = sk->sk_family; 290 switch (addr.family) { 291 case AF_INET: 292 addr.addr.a4 = inet_sk(sk)->inet_daddr; 293 hash = (__force unsigned int) addr.addr.a4; 294 break; 295 case AF_INET6: 296 *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; 297 hash = ipv6_addr_hash(&inet6_sk(sk)->daddr); 298 break; 299 default: 300 return NULL; 301 } 302 303 net = dev_net(dst->dev); 304 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 305 306 tm = __tcp_get_metrics(&addr, net, hash); 307 reclaim = false; 308 if (tm == TCP_METRICS_RECLAIM_PTR) { 309 reclaim = true; 310 tm = NULL; 311 } 312 if (!tm && create) 313 tm = tcpm_new(dst, &addr, hash, reclaim); 314 else 315 tcpm_check_stamp(tm, dst); 316 317 return tm; 318 } 319 320 /* Save metrics learned by this TCP session. This function is called 321 * only, when TCP finishes successfully i.e. when it enters TIME-WAIT 322 * or goes from LAST-ACK to CLOSE. 323 */ 324 void tcp_update_metrics(struct sock *sk) 325 { 326 const struct inet_connection_sock *icsk = inet_csk(sk); 327 struct dst_entry *dst = __sk_dst_get(sk); 328 struct tcp_sock *tp = tcp_sk(sk); 329 struct tcp_metrics_block *tm; 330 unsigned long rtt; 331 u32 val; 332 int m; 333 334 if (sysctl_tcp_nometrics_save || !dst) 335 return; 336 337 if (dst->flags & DST_HOST) 338 dst_confirm(dst); 339 340 rcu_read_lock(); 341 if (icsk->icsk_backoff || !tp->srtt) { 342 /* This session failed to estimate rtt. Why? 343 * Probably, no packets returned in time. Reset our 344 * results. 345 */ 346 tm = tcp_get_metrics(sk, dst, false); 347 if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT)) 348 tcp_metric_set(tm, TCP_METRIC_RTT, 0); 349 goto out_unlock; 350 } else 351 tm = tcp_get_metrics(sk, dst, true); 352 353 if (!tm) 354 goto out_unlock; 355 356 rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); 357 m = rtt - tp->srtt; 358 359 /* If newly calculated rtt larger than stored one, store new 360 * one. Otherwise, use EWMA. Remember, rtt overestimation is 361 * always better than underestimation. 362 */ 363 if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { 364 if (m <= 0) 365 rtt = tp->srtt; 366 else 367 rtt -= (m >> 3); 368 tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); 369 } 370 371 if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { 372 unsigned long var; 373 374 if (m < 0) 375 m = -m; 376 377 /* Scale deviation to rttvar fixed point */ 378 m >>= 1; 379 if (m < tp->mdev) 380 m = tp->mdev; 381 382 var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); 383 if (m >= var) 384 var = m; 385 else 386 var -= (var - m) >> 2; 387 388 tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); 389 } 390 391 if (tcp_in_initial_slowstart(tp)) { 392 /* Slow start still did not finish. */ 393 if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { 394 val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); 395 if (val && (tp->snd_cwnd >> 1) > val) 396 tcp_metric_set(tm, TCP_METRIC_SSTHRESH, 397 tp->snd_cwnd >> 1); 398 } 399 if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { 400 val = tcp_metric_get(tm, TCP_METRIC_CWND); 401 if (tp->snd_cwnd > val) 402 tcp_metric_set(tm, TCP_METRIC_CWND, 403 tp->snd_cwnd); 404 } 405 } else if (tp->snd_cwnd > tp->snd_ssthresh && 406 icsk->icsk_ca_state == TCP_CA_Open) { 407 /* Cong. avoidance phase, cwnd is reliable. */ 408 if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) 409 tcp_metric_set(tm, TCP_METRIC_SSTHRESH, 410 max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); 411 if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { 412 val = tcp_metric_get(tm, TCP_METRIC_CWND); 413 tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1); 414 } 415 } else { 416 /* Else slow start did not finish, cwnd is non-sense, 417 * ssthresh may be also invalid. 418 */ 419 if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { 420 val = tcp_metric_get(tm, TCP_METRIC_CWND); 421 tcp_metric_set(tm, TCP_METRIC_CWND, 422 (val + tp->snd_ssthresh) >> 1); 423 } 424 if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { 425 val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); 426 if (val && tp->snd_ssthresh > val) 427 tcp_metric_set(tm, TCP_METRIC_SSTHRESH, 428 tp->snd_ssthresh); 429 } 430 if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { 431 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 432 if (val < tp->reordering && 433 tp->reordering != sysctl_tcp_reordering) 434 tcp_metric_set(tm, TCP_METRIC_REORDERING, 435 tp->reordering); 436 } 437 } 438 tm->tcpm_stamp = jiffies; 439 out_unlock: 440 rcu_read_unlock(); 441 } 442 443 /* Initialize metrics on socket. */ 444 445 void tcp_init_metrics(struct sock *sk) 446 { 447 struct dst_entry *dst = __sk_dst_get(sk); 448 struct tcp_sock *tp = tcp_sk(sk); 449 struct tcp_metrics_block *tm; 450 u32 val; 451 452 if (dst == NULL) 453 goto reset; 454 455 dst_confirm(dst); 456 457 rcu_read_lock(); 458 tm = tcp_get_metrics(sk, dst, true); 459 if (!tm) { 460 rcu_read_unlock(); 461 goto reset; 462 } 463 464 if (tcp_metric_locked(tm, TCP_METRIC_CWND)) 465 tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); 466 467 val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); 468 if (val) { 469 tp->snd_ssthresh = val; 470 if (tp->snd_ssthresh > tp->snd_cwnd_clamp) 471 tp->snd_ssthresh = tp->snd_cwnd_clamp; 472 } else { 473 /* ssthresh may have been reduced unnecessarily during. 474 * 3WHS. Restore it back to its initial default. 475 */ 476 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 477 } 478 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 479 if (val && tp->reordering != val) { 480 tcp_disable_fack(tp); 481 tcp_disable_early_retrans(tp); 482 tp->reordering = val; 483 } 484 485 val = tcp_metric_get(tm, TCP_METRIC_RTT); 486 if (val == 0 || tp->srtt == 0) { 487 rcu_read_unlock(); 488 goto reset; 489 } 490 /* Initial rtt is determined from SYN,SYN-ACK. 491 * The segment is small and rtt may appear much 492 * less than real one. Use per-dst memory 493 * to make it more realistic. 494 * 495 * A bit of theory. RTT is time passed after "normal" sized packet 496 * is sent until it is ACKed. In normal circumstances sending small 497 * packets force peer to delay ACKs and calculation is correct too. 498 * The algorithm is adaptive and, provided we follow specs, it 499 * NEVER underestimate RTT. BUT! If peer tries to make some clever 500 * tricks sort of "quick acks" for time long enough to decrease RTT 501 * to low value, and then abruptly stops to do it and starts to delay 502 * ACKs, wait for troubles. 503 */ 504 val = msecs_to_jiffies(val); 505 if (val > tp->srtt) { 506 tp->srtt = val; 507 tp->rtt_seq = tp->snd_nxt; 508 } 509 val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); 510 if (val > tp->mdev) { 511 tp->mdev = val; 512 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 513 } 514 rcu_read_unlock(); 515 516 tcp_set_rto(sk); 517 reset: 518 if (tp->srtt == 0) { 519 /* RFC6298: 5.7 We've failed to get a valid RTT sample from 520 * 3WHS. This is most likely due to retransmission, 521 * including spurious one. Reset the RTO back to 3secs 522 * from the more aggressive 1sec to avoid more spurious 523 * retransmission. 524 */ 525 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; 526 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; 527 } 528 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been 529 * retransmitted. In light of RFC6298 more aggressive 1sec 530 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK 531 * retransmission has occurred. 532 */ 533 if (tp->total_retrans > 1) 534 tp->snd_cwnd = 1; 535 else 536 tp->snd_cwnd = tcp_init_cwnd(tp, dst); 537 tp->snd_cwnd_stamp = tcp_time_stamp; 538 } 539 540 bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check) 541 { 542 struct tcp_metrics_block *tm; 543 bool ret; 544 545 if (!dst) 546 return false; 547 548 rcu_read_lock(); 549 tm = __tcp_get_metrics_req(req, dst); 550 if (paws_check) { 551 if (tm && 552 (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && 553 (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW) 554 ret = false; 555 else 556 ret = true; 557 } else { 558 if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp) 559 ret = true; 560 else 561 ret = false; 562 } 563 rcu_read_unlock(); 564 565 return ret; 566 } 567 EXPORT_SYMBOL_GPL(tcp_peer_is_proven); 568 569 void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) 570 { 571 struct tcp_metrics_block *tm; 572 573 rcu_read_lock(); 574 tm = tcp_get_metrics(sk, dst, true); 575 if (tm) { 576 struct tcp_sock *tp = tcp_sk(sk); 577 578 if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) { 579 tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp; 580 tp->rx_opt.ts_recent = tm->tcpm_ts; 581 } 582 } 583 rcu_read_unlock(); 584 } 585 EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp); 586 587 /* VJ's idea. Save last timestamp seen from this destination and hold 588 * it at least for normal timewait interval to use for duplicate 589 * segment detection in subsequent connections, before they enter 590 * synchronized state. 591 */ 592 bool tcp_remember_stamp(struct sock *sk) 593 { 594 struct dst_entry *dst = __sk_dst_get(sk); 595 bool ret = false; 596 597 if (dst) { 598 struct tcp_metrics_block *tm; 599 600 rcu_read_lock(); 601 tm = tcp_get_metrics(sk, dst, true); 602 if (tm) { 603 struct tcp_sock *tp = tcp_sk(sk); 604 605 if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 || 606 ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && 607 tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { 608 tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; 609 tm->tcpm_ts = tp->rx_opt.ts_recent; 610 } 611 ret = true; 612 } 613 rcu_read_unlock(); 614 } 615 return ret; 616 } 617 618 bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) 619 { 620 struct tcp_metrics_block *tm; 621 bool ret = false; 622 623 rcu_read_lock(); 624 tm = __tcp_get_metrics_tw(tw); 625 if (tm) { 626 const struct tcp_timewait_sock *tcptw; 627 struct sock *sk = (struct sock *) tw; 628 629 tcptw = tcp_twsk(sk); 630 if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 || 631 ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && 632 tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { 633 tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; 634 tm->tcpm_ts = tcptw->tw_ts_recent; 635 } 636 ret = true; 637 } 638 rcu_read_unlock(); 639 640 return ret; 641 } 642 643 static DEFINE_SEQLOCK(fastopen_seqlock); 644 645 void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, 646 struct tcp_fastopen_cookie *cookie, 647 int *syn_loss, unsigned long *last_syn_loss) 648 { 649 struct tcp_metrics_block *tm; 650 651 rcu_read_lock(); 652 tm = tcp_get_metrics(sk, __sk_dst_get(sk), false); 653 if (tm) { 654 struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; 655 unsigned int seq; 656 657 do { 658 seq = read_seqbegin(&fastopen_seqlock); 659 if (tfom->mss) 660 *mss = tfom->mss; 661 *cookie = tfom->cookie; 662 *syn_loss = tfom->syn_loss; 663 *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; 664 } while (read_seqretry(&fastopen_seqlock, seq)); 665 } 666 rcu_read_unlock(); 667 } 668 669 void tcp_fastopen_cache_set(struct sock *sk, u16 mss, 670 struct tcp_fastopen_cookie *cookie, bool syn_lost) 671 { 672 struct tcp_metrics_block *tm; 673 674 rcu_read_lock(); 675 tm = tcp_get_metrics(sk, __sk_dst_get(sk), true); 676 if (tm) { 677 struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; 678 679 write_seqlock_bh(&fastopen_seqlock); 680 tfom->mss = mss; 681 if (cookie->len > 0) 682 tfom->cookie = *cookie; 683 if (syn_lost) { 684 ++tfom->syn_loss; 685 tfom->last_syn_loss = jiffies; 686 } else 687 tfom->syn_loss = 0; 688 write_sequnlock_bh(&fastopen_seqlock); 689 } 690 rcu_read_unlock(); 691 } 692 693 static unsigned int tcpmhash_entries; 694 static int __init set_tcpmhash_entries(char *str) 695 { 696 ssize_t ret; 697 698 if (!str) 699 return 0; 700 701 ret = kstrtouint(str, 0, &tcpmhash_entries); 702 if (ret) 703 return 0; 704 705 return 1; 706 } 707 __setup("tcpmhash_entries=", set_tcpmhash_entries); 708 709 static int __net_init tcp_net_metrics_init(struct net *net) 710 { 711 size_t size; 712 unsigned int slots; 713 714 slots = tcpmhash_entries; 715 if (!slots) { 716 if (totalram_pages >= 128 * 1024) 717 slots = 16 * 1024; 718 else 719 slots = 8 * 1024; 720 } 721 722 net->ipv4.tcp_metrics_hash_log = order_base_2(slots); 723 size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; 724 725 net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); 726 if (!net->ipv4.tcp_metrics_hash) 727 return -ENOMEM; 728 729 return 0; 730 } 731 732 static void __net_exit tcp_net_metrics_exit(struct net *net) 733 { 734 unsigned int i; 735 736 for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { 737 struct tcp_metrics_block *tm, *next; 738 739 tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); 740 while (tm) { 741 next = rcu_dereference_protected(tm->tcpm_next, 1); 742 kfree(tm); 743 tm = next; 744 } 745 } 746 kfree(net->ipv4.tcp_metrics_hash); 747 } 748 749 static __net_initdata struct pernet_operations tcp_net_metrics_ops = { 750 .init = tcp_net_metrics_init, 751 .exit = tcp_net_metrics_exit, 752 }; 753 754 void __init tcp_metrics_init(void) 755 { 756 register_pernet_subsys(&tcp_net_metrics_ops); 757 } 758