tcp_input.c (650c919798c71fb34b77a6f2ba03a06907f06a76) | tcp_input.c (c3a8d9474684d391b0afc3970d9b249add15ec07) |
---|---|
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Implementation of the Transmission Control Protocol(TCP). 7 * 8 * Authors: Ross Biro --- 95 unchanged lines hidden (view full) --- 104 105#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 106#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 107#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ 108#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */ 109#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ 110#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 111#define FLAG_ECE 0x40 /* ECE in this ACK */ | 1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Implementation of the Transmission Control Protocol(TCP). 7 * 8 * Authors: Ross Biro --- 95 unchanged lines hidden (view full) --- 104 105#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 106#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 107#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ 108#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */ 109#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ 110#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 111#define FLAG_ECE 0x40 /* ECE in this ACK */ |
112#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ |
|
112#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 113#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ 114#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 115#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 116#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 117#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ 118 119#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) --- 71 unchanged lines hidden (view full) --- 191 icsk->icsk_ack.pingpong = 0; 192 icsk->icsk_ack.ato = TCP_ATO_MIN; 193} 194 195/* Send ACKs quickly, if "quick" count is not exhausted 196 * and the session is not interactive. 197 */ 198 | 113#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 114#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ 115#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 116#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 117#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 118#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ 119 120#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) --- 71 unchanged lines hidden (view full) --- 192 icsk->icsk_ack.pingpong = 0; 193 icsk->icsk_ack.ato = TCP_ATO_MIN; 194} 195 196/* Send ACKs quickly, if "quick" count is not exhausted 197 * and the session is not interactive. 198 */ 199 |
199static inline bool tcp_in_quickack_mode(const struct sock *sk) | 200static bool tcp_in_quickack_mode(struct sock *sk) |
200{ 201 const struct inet_connection_sock *icsk = inet_csk(sk); | 201{ 202 const struct inet_connection_sock *icsk = inet_csk(sk); |
203 const struct dst_entry *dst = __sk_dst_get(sk); |
|
202 | 204 |
203 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; | 205 return (dst && dst_metric(dst, RTAX_QUICKACK)) || 206 (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong); |
204} 205 206static void tcp_ecn_queue_cwr(struct tcp_sock *tp) 207{ 208 if (tp->ecn_flags & TCP_ECN_OK) 209 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 210} 211 --- 533 unchanged lines hidden (view full) --- 745} 746 747/* Set the sk_pacing_rate to allow proper sizing of TSO packets. 748 * Note: TCP stack does not yet implement pacing. 749 * FQ packet scheduler can be used to implement cheap but effective 750 * TCP pacing, to smooth the burst on large writes when packets 751 * in flight is significantly lower than cwnd (or rwin) 752 */ | 207} 208 209static void tcp_ecn_queue_cwr(struct tcp_sock *tp) 210{ 211 if (tp->ecn_flags & TCP_ECN_OK) 212 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 213} 214 --- 533 unchanged lines hidden (view full) --- 748} 749 750/* Set the sk_pacing_rate to allow proper sizing of TSO packets. 751 * Note: TCP stack does not yet implement pacing. 752 * FQ packet scheduler can be used to implement cheap but effective 753 * TCP pacing, to smooth the burst on large writes when packets 754 * in flight is significantly lower than cwnd (or rwin) 755 */ |
756int sysctl_tcp_pacing_ss_ratio __read_mostly = 200; 757int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; 758 |
|
753static void tcp_update_pacing_rate(struct sock *sk) 754{ 755 const struct tcp_sock *tp = tcp_sk(sk); 756 u64 rate; 757 758 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ | 759static void tcp_update_pacing_rate(struct sock *sk) 760{ 761 const struct tcp_sock *tp = tcp_sk(sk); 762 u64 rate; 763 764 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ |
759 rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); | 765 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3); |
760 | 766 |
767 /* current rate is (cwnd * mss) / srtt 768 * In Slow Start [1], set sk_pacing_rate to 200 % the current rate. 769 * In Congestion Avoidance phase, set it to 120 % the current rate. 770 * 771 * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh) 772 * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching 773 * end of slow start and should slow down. 774 */ 775 if (tp->snd_cwnd < tp->snd_ssthresh / 2) 776 rate *= sysctl_tcp_pacing_ss_ratio; 777 else 778 rate *= sysctl_tcp_pacing_ca_ratio; 779 |
|
761 rate *= max(tp->snd_cwnd, tp->packets_out); 762 763 if (likely(tp->srtt_us)) 764 do_div(rate, tp->srtt_us); 765 766 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate 767 * without any lock. We want to make sure compiler wont store 768 * intermediate values in this location. --- 263 unchanged lines hidden (view full) --- 1032 * Event "B". Later note: FACK people cheated me again 8), we have to account 1033 * for reordering! Ugly, but should help. 1034 * 1035 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1036 * less than what is now known to be received by the other end (derived from 1037 * highest SACK block). Also calculate the lowest snd_nxt among the remaining 1038 * retransmitted skbs to avoid some costly processing per ACKs. 1039 */ | 780 rate *= max(tp->snd_cwnd, tp->packets_out); 781 782 if (likely(tp->srtt_us)) 783 do_div(rate, tp->srtt_us); 784 785 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate 786 * without any lock. We want to make sure compiler wont store 787 * intermediate values in this location. --- 263 unchanged lines hidden (view full) --- 1051 * Event "B". Later note: FACK people cheated me again 8), we have to account 1052 * for reordering! Ugly, but should help. 1053 * 1054 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1055 * less than what is now known to be received by the other end (derived from 1056 * highest SACK block). Also calculate the lowest snd_nxt among the remaining 1057 * retransmitted skbs to avoid some costly processing per ACKs. 1058 */ |
1040static void tcp_mark_lost_retrans(struct sock *sk) | 1059static void tcp_mark_lost_retrans(struct sock *sk, int *flag) |
1041{ 1042 const struct inet_connection_sock *icsk = inet_csk(sk); 1043 struct tcp_sock *tp = tcp_sk(sk); 1044 struct sk_buff *skb; 1045 int cnt = 0; 1046 u32 new_low_seq = tp->snd_nxt; 1047 u32 received_upto = tcp_highest_sack_seq(tp); 1048 --- 24 unchanged lines hidden (view full) --- 1073 * Whether FACK should check here for tp->reordering segs 1074 * in-between one could argue for either way (it would be 1075 * rather simple to implement as we could count fack_count 1076 * during the walk and do tp->fackets_out - fack_count). 1077 */ 1078 if (after(received_upto, ack_seq)) { 1079 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1080 tp->retrans_out -= tcp_skb_pcount(skb); | 1060{ 1061 const struct inet_connection_sock *icsk = inet_csk(sk); 1062 struct tcp_sock *tp = tcp_sk(sk); 1063 struct sk_buff *skb; 1064 int cnt = 0; 1065 u32 new_low_seq = tp->snd_nxt; 1066 u32 received_upto = tcp_highest_sack_seq(tp); 1067 --- 24 unchanged lines hidden (view full) --- 1092 * Whether FACK should check here for tp->reordering segs 1093 * in-between one could argue for either way (it would be 1094 * rather simple to implement as we could count fack_count 1095 * during the walk and do tp->fackets_out - fack_count). 1096 */ 1097 if (after(received_upto, ack_seq)) { 1098 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1099 tp->retrans_out -= tcp_skb_pcount(skb); |
1081 | 1100 *flag |= FLAG_LOST_RETRANS; |
1082 tcp_skb_mark_lost_uncond_verify(tp, skb); 1083 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); 1084 } else { 1085 if (before(ack_seq, new_low_seq)) 1086 new_low_seq = ack_seq; 1087 cnt += tcp_skb_pcount(skb); 1088 } 1089 } --- 723 unchanged lines hidden (view full) --- 1813 } 1814 for (j = 0; j < used_sacks; j++) 1815 tp->recv_sack_cache[i++] = sp[j]; 1816 1817 if ((state->reord < tp->fackets_out) && 1818 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1819 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); 1820 | 1101 tcp_skb_mark_lost_uncond_verify(tp, skb); 1102 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); 1103 } else { 1104 if (before(ack_seq, new_low_seq)) 1105 new_low_seq = ack_seq; 1106 cnt += tcp_skb_pcount(skb); 1107 } 1108 } --- 723 unchanged lines hidden (view full) --- 1832 } 1833 for (j = 0; j < used_sacks; j++) 1834 tp->recv_sack_cache[i++] = sp[j]; 1835 1836 if ((state->reord < tp->fackets_out) && 1837 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1838 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); 1839 |
1821 tcp_mark_lost_retrans(sk); | 1840 tcp_mark_lost_retrans(sk, &state->flag); |
1822 tcp_verify_left_out(tp); 1823out: 1824 1825#if FASTRETRANS_DEBUG > 0 1826 WARN_ON((int)tp->sacked_out < 0); 1827 WARN_ON((int)tp->lost_out < 0); 1828 WARN_ON((int)tp->retrans_out < 0); 1829 WARN_ON((int)tcp_packets_in_flight(tp) < 0); --- 639 unchanged lines hidden (view full) --- 2469 inet_csk(sk)->icsk_retransmits = 0; 2470 if (frto_undo || tcp_is_sack(tp)) 2471 tcp_set_ca_state(sk, TCP_CA_Open); 2472 return true; 2473 } 2474 return false; 2475} 2476 | 1841 tcp_verify_left_out(tp); 1842out: 1843 1844#if FASTRETRANS_DEBUG > 0 1845 WARN_ON((int)tp->sacked_out < 0); 1846 WARN_ON((int)tp->lost_out < 0); 1847 WARN_ON((int)tp->retrans_out < 0); 1848 WARN_ON((int)tcp_packets_in_flight(tp) < 0); --- 639 unchanged lines hidden (view full) --- 2488 inet_csk(sk)->icsk_retransmits = 0; 2489 if (frto_undo || tcp_is_sack(tp)) 2490 tcp_set_ca_state(sk, TCP_CA_Open); 2491 return true; 2492 } 2493 return false; 2494} 2495 |
2477/* The cwnd reduction in CWR and Recovery use the PRR algorithm 2478 * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ | 2496/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937. |
2479 * It computes the number of packets to send (sndcnt) based on packets newly 2480 * delivered: 2481 * 1) If the packets in flight is larger than ssthresh, PRR spreads the 2482 * cwnd reductions across a full RTT. | 2497 * It computes the number of packets to send (sndcnt) based on packets newly 2498 * delivered: 2499 * 1) If the packets in flight is larger than ssthresh, PRR spreads the 2500 * cwnd reductions across a full RTT. |
2483 * 2) If packets in flight is lower than ssthresh (such as due to excess 2484 * losses and/or application stalls), do not perform any further cwnd 2485 * reductions, but instead slow start up to ssthresh. | 2501 * 2) Otherwise PRR uses packet conservation to send as much as delivered. 2502 * But when the retransmits are acked without further losses, PRR 2503 * slow starts cwnd up to ssthresh to speed up the recovery. |
2486 */ 2487static void tcp_init_cwnd_reduction(struct sock *sk) 2488{ 2489 struct tcp_sock *tp = tcp_sk(sk); 2490 2491 tp->high_seq = tp->snd_nxt; 2492 tp->tlp_high_seq = 0; 2493 tp->snd_cwnd_cnt = 0; 2494 tp->prior_cwnd = tp->snd_cwnd; 2495 tp->prr_delivered = 0; 2496 tp->prr_out = 0; 2497 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 2498 tcp_ecn_queue_cwr(tp); 2499} 2500 2501static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, | 2504 */ 2505static void tcp_init_cwnd_reduction(struct sock *sk) 2506{ 2507 struct tcp_sock *tp = tcp_sk(sk); 2508 2509 tp->high_seq = tp->snd_nxt; 2510 tp->tlp_high_seq = 0; 2511 tp->snd_cwnd_cnt = 0; 2512 tp->prior_cwnd = tp->snd_cwnd; 2513 tp->prr_delivered = 0; 2514 tp->prr_out = 0; 2515 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 2516 tcp_ecn_queue_cwr(tp); 2517} 2518 2519static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, |
2502 int fast_rexmit) | 2520 int fast_rexmit, int flag) |
2503{ 2504 struct tcp_sock *tp = tcp_sk(sk); 2505 int sndcnt = 0; 2506 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); 2507 int newly_acked_sacked = prior_unsacked - 2508 (tp->packets_out - tp->sacked_out); 2509 2510 tp->prr_delivered += newly_acked_sacked; | 2521{ 2522 struct tcp_sock *tp = tcp_sk(sk); 2523 int sndcnt = 0; 2524 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); 2525 int newly_acked_sacked = prior_unsacked - 2526 (tp->packets_out - tp->sacked_out); 2527 2528 tp->prr_delivered += newly_acked_sacked; |
2511 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | 2529 if (delta < 0) { |
2512 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + 2513 tp->prior_cwnd - 1; 2514 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; | 2530 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + 2531 tp->prior_cwnd - 1; 2532 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; |
2515 } else { | 2533 } else if ((flag & FLAG_RETRANS_DATA_ACKED) && 2534 !(flag & FLAG_LOST_RETRANS)) { |
2516 sndcnt = min_t(int, delta, 2517 max_t(int, tp->prr_delivered - tp->prr_out, 2518 newly_acked_sacked) + 1); | 2535 sndcnt = min_t(int, delta, 2536 max_t(int, tp->prr_delivered - tp->prr_out, 2537 newly_acked_sacked) + 1); |
2538 } else { 2539 sndcnt = min(delta, newly_acked_sacked); |
|
2519 } | 2540 } |
2520 | |
2521 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); 2522 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; 2523} 2524 2525static inline void tcp_end_cwnd_reduction(struct sock *sk) 2526{ 2527 struct tcp_sock *tp = tcp_sk(sk); 2528 --- 44 unchanged lines hidden (view full) --- 2573 tp->retrans_stamp = 0; 2574 2575 if (flag & FLAG_ECE) 2576 tcp_enter_cwr(sk); 2577 2578 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2579 tcp_try_keep_open(sk); 2580 } else { | 2541 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); 2542 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; 2543} 2544 2545static inline void tcp_end_cwnd_reduction(struct sock *sk) 2546{ 2547 struct tcp_sock *tp = tcp_sk(sk); 2548 --- 44 unchanged lines hidden (view full) --- 2593 tp->retrans_stamp = 0; 2594 2595 if (flag & FLAG_ECE) 2596 tcp_enter_cwr(sk); 2597 2598 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2599 tcp_try_keep_open(sk); 2600 } else { |
2581 tcp_cwnd_reduction(sk, prior_unsacked, 0); | 2601 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); |
2582 } 2583} 2584 2585static void tcp_mtup_probe_failed(struct sock *sk) 2586{ 2587 struct inet_connection_sock *icsk = inet_csk(sk); 2588 2589 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1; 2590 icsk->icsk_mtup.probe_size = 0; | 2602 } 2603} 2604 2605static void tcp_mtup_probe_failed(struct sock *sk) 2606{ 2607 struct inet_connection_sock *icsk = inet_csk(sk); 2608 2609 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1; 2610 icsk->icsk_mtup.probe_size = 0; |
2611 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); |
|
2591} 2592 2593static void tcp_mtup_probe_success(struct sock *sk) 2594{ 2595 struct tcp_sock *tp = tcp_sk(sk); 2596 struct inet_connection_sock *icsk = inet_csk(sk); 2597 2598 /* FIXME: breaks with very large cwnd */ 2599 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2600 tp->snd_cwnd = tp->snd_cwnd * 2601 tcp_mss_to_mtu(sk, tp->mss_cache) / 2602 icsk->icsk_mtup.probe_size; 2603 tp->snd_cwnd_cnt = 0; 2604 tp->snd_cwnd_stamp = tcp_time_stamp; 2605 tp->snd_ssthresh = tcp_current_ssthresh(sk); 2606 2607 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; 2608 icsk->icsk_mtup.probe_size = 0; 2609 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 2612} 2613 2614static void tcp_mtup_probe_success(struct sock *sk) 2615{ 2616 struct tcp_sock *tp = tcp_sk(sk); 2617 struct inet_connection_sock *icsk = inet_csk(sk); 2618 2619 /* FIXME: breaks with very large cwnd */ 2620 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2621 tp->snd_cwnd = tp->snd_cwnd * 2622 tcp_mss_to_mtu(sk, tp->mss_cache) / 2623 icsk->icsk_mtup.probe_size; 2624 tp->snd_cwnd_cnt = 0; 2625 tp->snd_cwnd_stamp = tcp_time_stamp; 2626 tp->snd_ssthresh = tcp_current_ssthresh(sk); 2627 2628 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; 2629 icsk->icsk_mtup.probe_size = 0; 2630 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
2631 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); |
|
2610} 2611 2612/* Do a simple retransmit without using the backoff mechanisms in 2613 * tcp_timer. This is used for path mtu discovery. 2614 * The socket is already locked here. 2615 */ 2616void tcp_simple_retransmit(struct sock *sk) 2617{ --- 52 unchanged lines hidden (view full) --- 2670 else 2671 mib_idx = LINUX_MIB_TCPSACKRECOVERY; 2672 2673 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2674 2675 tp->prior_ssthresh = 0; 2676 tcp_init_undo(tp); 2677 | 2632} 2633 2634/* Do a simple retransmit without using the backoff mechanisms in 2635 * tcp_timer. This is used for path mtu discovery. 2636 * The socket is already locked here. 2637 */ 2638void tcp_simple_retransmit(struct sock *sk) 2639{ --- 52 unchanged lines hidden (view full) --- 2692 else 2693 mib_idx = LINUX_MIB_TCPSACKRECOVERY; 2694 2695 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2696 2697 tp->prior_ssthresh = 0; 2698 tcp_init_undo(tp); 2699 |
2678 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2700 if (!tcp_in_cwnd_reduction(sk)) { |
2679 if (!ece_ack) 2680 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2681 tcp_init_cwnd_reduction(sk); 2682 } 2683 tcp_set_ca_state(sk, TCP_CA_Recovery); 2684} 2685 2686/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are --- 43 unchanged lines hidden (view full) --- 2730 else if (flag & FLAG_SND_UNA_ADVANCED) 2731 tcp_reset_reno_sack(tp); 2732 } 2733 tcp_xmit_retransmit_queue(sk); 2734} 2735 2736/* Undo during fast recovery after partial ACK. */ 2737static bool tcp_try_undo_partial(struct sock *sk, const int acked, | 2701 if (!ece_ack) 2702 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2703 tcp_init_cwnd_reduction(sk); 2704 } 2705 tcp_set_ca_state(sk, TCP_CA_Recovery); 2706} 2707 2708/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are --- 43 unchanged lines hidden (view full) --- 2752 else if (flag & FLAG_SND_UNA_ADVANCED) 2753 tcp_reset_reno_sack(tp); 2754 } 2755 tcp_xmit_retransmit_queue(sk); 2756} 2757 2758/* Undo during fast recovery after partial ACK. */ 2759static bool tcp_try_undo_partial(struct sock *sk, const int acked, |
2738 const int prior_unsacked) | 2760 const int prior_unsacked, int flag) |
2739{ 2740 struct tcp_sock *tp = tcp_sk(sk); 2741 2742 if (tp->undo_marker && tcp_packet_delayed(tp)) { 2743 /* Plain luck! Hole if filled with delayed 2744 * packet, rather than with a retransmit. 2745 */ 2746 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 2747 2748 /* We are getting evidence that the reordering degree is higher 2749 * than we realized. If there are no retransmits out then we 2750 * can undo. Otherwise we clock out new packets but do not 2751 * mark more packets lost or retransmit more. 2752 */ 2753 if (tp->retrans_out) { | 2761{ 2762 struct tcp_sock *tp = tcp_sk(sk); 2763 2764 if (tp->undo_marker && tcp_packet_delayed(tp)) { 2765 /* Plain luck! Hole if filled with delayed 2766 * packet, rather than with a retransmit. 2767 */ 2768 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 2769 2770 /* We are getting evidence that the reordering degree is higher 2771 * than we realized. If there are no retransmits out then we 2772 * can undo. Otherwise we clock out new packets but do not 2773 * mark more packets lost or retransmit more. 2774 */ 2775 if (tp->retrans_out) { |
2754 tcp_cwnd_reduction(sk, prior_unsacked, 0); | 2776 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); |
2755 return true; 2756 } 2757 2758 if (!tcp_any_retrans_done(sk)) 2759 tp->retrans_stamp = 0; 2760 2761 DBGUNDO(sk, "partial recovery"); 2762 tcp_undo_cwnd_reduction(sk, true); --- 70 unchanged lines hidden (view full) --- 2833 2834 /* E. Process state. */ 2835 switch (icsk->icsk_ca_state) { 2836 case TCP_CA_Recovery: 2837 if (!(flag & FLAG_SND_UNA_ADVANCED)) { 2838 if (tcp_is_reno(tp) && is_dupack) 2839 tcp_add_reno_sack(sk); 2840 } else { | 2777 return true; 2778 } 2779 2780 if (!tcp_any_retrans_done(sk)) 2781 tp->retrans_stamp = 0; 2782 2783 DBGUNDO(sk, "partial recovery"); 2784 tcp_undo_cwnd_reduction(sk, true); --- 70 unchanged lines hidden (view full) --- 2855 2856 /* E. Process state. */ 2857 switch (icsk->icsk_ca_state) { 2858 case TCP_CA_Recovery: 2859 if (!(flag & FLAG_SND_UNA_ADVANCED)) { 2860 if (tcp_is_reno(tp) && is_dupack) 2861 tcp_add_reno_sack(sk); 2862 } else { |
2841 if (tcp_try_undo_partial(sk, acked, prior_unsacked)) | 2863 if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag)) |
2842 return; 2843 /* Partial ACK arrived. Force fast retransmit. */ 2844 do_lost = tcp_is_reno(tp) || 2845 tcp_fackets_out(tp) > tp->reordering; 2846 } 2847 if (tcp_try_undo_dsack(sk)) { 2848 tcp_try_keep_open(sk); 2849 return; 2850 } 2851 break; 2852 case TCP_CA_Loss: 2853 tcp_process_loss(sk, flag, is_dupack); | 2864 return; 2865 /* Partial ACK arrived. Force fast retransmit. */ 2866 do_lost = tcp_is_reno(tp) || 2867 tcp_fackets_out(tp) > tp->reordering; 2868 } 2869 if (tcp_try_undo_dsack(sk)) { 2870 tcp_try_keep_open(sk); 2871 return; 2872 } 2873 break; 2874 case TCP_CA_Loss: 2875 tcp_process_loss(sk, flag, is_dupack); |
2854 if (icsk->icsk_ca_state != TCP_CA_Open) | 2876 if (icsk->icsk_ca_state != TCP_CA_Open && 2877 !(flag & FLAG_LOST_RETRANS)) |
2855 return; | 2878 return; |
2856 /* Fall through to processing in Open state. */ | 2879 /* Change state if cwnd is undone or retransmits are lost */ |
2857 default: 2858 if (tcp_is_reno(tp)) { 2859 if (flag & FLAG_SND_UNA_ADVANCED) 2860 tcp_reset_reno_sack(tp); 2861 if (is_dupack) 2862 tcp_add_reno_sack(sk); 2863 } 2864 --- 18 unchanged lines hidden (view full) --- 2883 2884 /* Otherwise enter Recovery state */ 2885 tcp_enter_recovery(sk, (flag & FLAG_ECE)); 2886 fast_rexmit = 1; 2887 } 2888 2889 if (do_lost) 2890 tcp_update_scoreboard(sk, fast_rexmit); | 2880 default: 2881 if (tcp_is_reno(tp)) { 2882 if (flag & FLAG_SND_UNA_ADVANCED) 2883 tcp_reset_reno_sack(tp); 2884 if (is_dupack) 2885 tcp_add_reno_sack(sk); 2886 } 2887 --- 18 unchanged lines hidden (view full) --- 2906 2907 /* Otherwise enter Recovery state */ 2908 tcp_enter_recovery(sk, (flag & FLAG_ECE)); 2909 fast_rexmit = 1; 2910 } 2911 2912 if (do_lost) 2913 tcp_update_scoreboard(sk, fast_rexmit); |
2891 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit); | 2914 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag); |
2892 tcp_xmit_retransmit_queue(sk); 2893} 2894 2895static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, 2896 long seq_rtt_us, long sack_rtt_us) 2897{ 2898 const struct tcp_sock *tp = tcp_sk(sk); 2899 --- 420 unchanged lines hidden (view full) --- 3320 tp->snd_wnd = nwin; 3321 3322 /* Note, it is the only place, where 3323 * fast path is recovered for sending TCP. 3324 */ 3325 tp->pred_flags = 0; 3326 tcp_fast_path_check(sk); 3327 | 2915 tcp_xmit_retransmit_queue(sk); 2916} 2917 2918static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, 2919 long seq_rtt_us, long sack_rtt_us) 2920{ 2921 const struct tcp_sock *tp = tcp_sk(sk); 2922 --- 420 unchanged lines hidden (view full) --- 3343 tp->snd_wnd = nwin; 3344 3345 /* Note, it is the only place, where 3346 * fast path is recovered for sending TCP. 3347 */ 3348 tp->pred_flags = 0; 3349 tcp_fast_path_check(sk); 3350 |
3351 if (tcp_send_head(sk)) 3352 tcp_slow_start_after_idle_check(sk); 3353 |
|
3328 if (nwin > tp->max_window) { 3329 tp->max_window = nwin; 3330 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); 3331 } 3332 } 3333 } 3334 3335 tcp_snd_una_update(tp, ack); --- 221 unchanged lines hidden (view full) --- 3557 goto no_queue; 3558 3559 /* See if we can take anything off of the retransmit queue. */ 3560 acked = tp->packets_out; 3561 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, 3562 &sack_state); 3563 acked -= tp->packets_out; 3564 | 3354 if (nwin > tp->max_window) { 3355 tp->max_window = nwin; 3356 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); 3357 } 3358 } 3359 } 3360 3361 tcp_snd_una_update(tp, ack); --- 221 unchanged lines hidden (view full) --- 3583 goto no_queue; 3584 3585 /* See if we can take anything off of the retransmit queue. */ 3586 acked = tp->packets_out; 3587 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, 3588 &sack_state); 3589 acked -= tp->packets_out; 3590 |
3565 /* Advance cwnd if state allows */ 3566 if (tcp_may_raise_cwnd(sk, flag)) 3567 tcp_cong_avoid(sk, ack, acked); 3568 | |
3569 if (tcp_ack_is_dubious(sk, flag)) { 3570 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3571 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3572 is_dupack, flag); 3573 } 3574 if (tp->tlp_high_seq) 3575 tcp_process_tlp_ack(sk, ack, flag); 3576 | 3591 if (tcp_ack_is_dubious(sk, flag)) { 3592 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3593 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3594 is_dupack, flag); 3595 } 3596 if (tp->tlp_high_seq) 3597 tcp_process_tlp_ack(sk, ack, flag); 3598 |
3599 /* Advance cwnd if state allows */ 3600 if (tcp_may_raise_cwnd(sk, flag)) 3601 tcp_cong_avoid(sk, ack, acked); 3602 |
|
3577 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3578 struct dst_entry *dst = __sk_dst_get(sk); 3579 if (dst) 3580 dst_confirm(dst); 3581 } 3582 3583 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3584 tcp_schedule_loss_probe(sk); --- 357 unchanged lines hidden (view full) --- 3942 * If we are in FINWAIT-1, a received FIN indicates simultaneous 3943 * close and we go into CLOSING (and later onto TIME-WAIT) 3944 * 3945 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. 3946 */ 3947static void tcp_fin(struct sock *sk) 3948{ 3949 struct tcp_sock *tp = tcp_sk(sk); | 3603 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3604 struct dst_entry *dst = __sk_dst_get(sk); 3605 if (dst) 3606 dst_confirm(dst); 3607 } 3608 3609 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3610 tcp_schedule_loss_probe(sk); --- 357 unchanged lines hidden (view full) --- 3968 * If we are in FINWAIT-1, a received FIN indicates simultaneous 3969 * close and we go into CLOSING (and later onto TIME-WAIT) 3970 * 3971 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. 3972 */ 3973static void tcp_fin(struct sock *sk) 3974{ 3975 struct tcp_sock *tp = tcp_sk(sk); |
3950 const struct dst_entry *dst; | |
3951 3952 inet_csk_schedule_ack(sk); 3953 3954 sk->sk_shutdown |= RCV_SHUTDOWN; 3955 sock_set_flag(sk, SOCK_DONE); 3956 3957 switch (sk->sk_state) { 3958 case TCP_SYN_RECV: 3959 case TCP_ESTABLISHED: 3960 /* Move to CLOSE_WAIT */ 3961 tcp_set_state(sk, TCP_CLOSE_WAIT); | 3976 3977 inet_csk_schedule_ack(sk); 3978 3979 sk->sk_shutdown |= RCV_SHUTDOWN; 3980 sock_set_flag(sk, SOCK_DONE); 3981 3982 switch (sk->sk_state) { 3983 case TCP_SYN_RECV: 3984 case TCP_ESTABLISHED: 3985 /* Move to CLOSE_WAIT */ 3986 tcp_set_state(sk, TCP_CLOSE_WAIT); |
3962 dst = __sk_dst_get(sk); 3963 if (!dst || !dst_metric(dst, RTAX_QUICKACK)) 3964 inet_csk(sk)->icsk_ack.pingpong = 1; | 3987 inet_csk(sk)->icsk_ack.pingpong = 1; |
3965 break; 3966 3967 case TCP_CLOSE_WAIT: 3968 case TCP_CLOSING: 3969 /* Received a retransmission of the FIN, do 3970 * nothing. 3971 */ 3972 break; --- 2002 unchanged lines hidden (view full) --- 5975 const struct sk_buff *skb, 5976 const struct sock *listen_sk, 5977 const struct dst_entry *dst) 5978{ 5979 const struct tcphdr *th = tcp_hdr(skb); 5980 const struct net *net = sock_net(listen_sk); 5981 bool th_ecn = th->ece && th->cwr; 5982 bool ect, ecn_ok; | 3988 break; 3989 3990 case TCP_CLOSE_WAIT: 3991 case TCP_CLOSING: 3992 /* Received a retransmission of the FIN, do 3993 * nothing. 3994 */ 3995 break; --- 2002 unchanged lines hidden (view full) --- 5998 const struct sk_buff *skb, 5999 const struct sock *listen_sk, 6000 const struct dst_entry *dst) 6001{ 6002 const struct tcphdr *th = tcp_hdr(skb); 6003 const struct net *net = sock_net(listen_sk); 6004 bool th_ecn = th->ece && th->cwr; 6005 bool ect, ecn_ok; |
6006 u32 ecn_ok_dst; |
|
5983 5984 if (!th_ecn) 5985 return; 5986 5987 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); | 6007 6008 if (!th_ecn) 6009 return; 6010 6011 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); |
5988 ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); | 6012 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); 6013 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; |
5989 | 6014 |
5990 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) | 6015 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || 6016 (ecn_ok_dst & DST_FEATURE_ECN_CA)) |
5991 inet_rsk(req)->ecn_ok = 1; 5992} 5993 5994static void tcp_openreq_init(struct request_sock *req, 5995 const struct tcp_options_received *rx_opt, 5996 struct sk_buff *skb, const struct sock *sk) 5997{ 5998 struct inet_request_sock *ireq = inet_rsk(req); --- 231 unchanged lines hidden --- | 6017 inet_rsk(req)->ecn_ok = 1; 6018} 6019 6020static void tcp_openreq_init(struct request_sock *req, 6021 const struct tcp_options_received *rx_opt, 6022 struct sk_buff *skb, const struct sock *sk) 6023{ 6024 struct inet_request_sock *ireq = inet_rsk(req); --- 231 unchanged lines hidden --- |