tcp_input.c (650c919798c71fb34b77a6f2ba03a06907f06a76) tcp_input.c (c3a8d9474684d391b0afc3970d9b249add15ec07)
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Authors: Ross Biro

--- 95 unchanged lines hidden (view full) ---

104
105#define FLAG_DATA 0x01 /* Incoming frame contained data. */
106#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
107#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
108#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
109#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
110#define FLAG_DATA_SACKED 0x20 /* New SACK. */
111#define FLAG_ECE 0x40 /* ECE in this ACK */
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Authors: Ross Biro

--- 95 unchanged lines hidden (view full) ---

104
105#define FLAG_DATA 0x01 /* Incoming frame contained data. */
106#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
107#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
108#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
109#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
110#define FLAG_DATA_SACKED 0x20 /* New SACK. */
111#define FLAG_ECE 0x40 /* ECE in this ACK */
112#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */
112#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
113#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
114#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
115#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
116#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
117#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
118
119#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)

--- 71 unchanged lines hidden (view full) ---

191 icsk->icsk_ack.pingpong = 0;
192 icsk->icsk_ack.ato = TCP_ATO_MIN;
193}
194
195/* Send ACKs quickly, if "quick" count is not exhausted
196 * and the session is not interactive.
197 */
198
113#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
114#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
115#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
116#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
117#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
118#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
119
120#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)

--- 71 unchanged lines hidden (view full) ---

192 icsk->icsk_ack.pingpong = 0;
193 icsk->icsk_ack.ato = TCP_ATO_MIN;
194}
195
196/* Send ACKs quickly, if "quick" count is not exhausted
197 * and the session is not interactive.
198 */
199
199static inline bool tcp_in_quickack_mode(const struct sock *sk)
200static bool tcp_in_quickack_mode(struct sock *sk)
200{
201 const struct inet_connection_sock *icsk = inet_csk(sk);
201{
202 const struct inet_connection_sock *icsk = inet_csk(sk);
203 const struct dst_entry *dst = __sk_dst_get(sk);
202
204
203 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
205 return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
206 (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
204}
205
206static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
207{
208 if (tp->ecn_flags & TCP_ECN_OK)
209 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
210}
211

--- 533 unchanged lines hidden (view full) ---

745}
746
747/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
748 * Note: TCP stack does not yet implement pacing.
749 * FQ packet scheduler can be used to implement cheap but effective
750 * TCP pacing, to smooth the burst on large writes when packets
751 * in flight is significantly lower than cwnd (or rwin)
752 */
207}
208
209static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
210{
211 if (tp->ecn_flags & TCP_ECN_OK)
212 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
213}
214

--- 533 unchanged lines hidden (view full) ---

748}
749
750/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
751 * Note: TCP stack does not yet implement pacing.
752 * FQ packet scheduler can be used to implement cheap but effective
753 * TCP pacing, to smooth the burst on large writes when packets
754 * in flight is significantly lower than cwnd (or rwin)
755 */
756int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
757int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
758
753static void tcp_update_pacing_rate(struct sock *sk)
754{
755 const struct tcp_sock *tp = tcp_sk(sk);
756 u64 rate;
757
758 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
759static void tcp_update_pacing_rate(struct sock *sk)
760{
761 const struct tcp_sock *tp = tcp_sk(sk);
762 u64 rate;
763
764 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
759 rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
765 rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
760
766
767 /* current rate is (cwnd * mss) / srtt
768 * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
769 * In Congestion Avoidance phase, set it to 120 % the current rate.
770 *
771 * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
772 * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
773 * end of slow start and should slow down.
774 */
775 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
776 rate *= sysctl_tcp_pacing_ss_ratio;
777 else
778 rate *= sysctl_tcp_pacing_ca_ratio;
779
761 rate *= max(tp->snd_cwnd, tp->packets_out);
762
763 if (likely(tp->srtt_us))
764 do_div(rate, tp->srtt_us);
765
766 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
767 * without any lock. We want to make sure compiler wont store
768 * intermediate values in this location.

--- 263 unchanged lines hidden (view full) ---

1032 * Event "B". Later note: FACK people cheated me again 8), we have to account
1033 * for reordering! Ugly, but should help.
1034 *
1035 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
1036 * less than what is now known to be received by the other end (derived from
1037 * highest SACK block). Also calculate the lowest snd_nxt among the remaining
1038 * retransmitted skbs to avoid some costly processing per ACKs.
1039 */
780 rate *= max(tp->snd_cwnd, tp->packets_out);
781
782 if (likely(tp->srtt_us))
783 do_div(rate, tp->srtt_us);
784
785 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
786 * without any lock. We want to make sure compiler wont store
787 * intermediate values in this location.

--- 263 unchanged lines hidden (view full) ---

1051 * Event "B". Later note: FACK people cheated me again 8), we have to account
1052 * for reordering! Ugly, but should help.
1053 *
1054 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
1055 * less than what is now known to be received by the other end (derived from
1056 * highest SACK block). Also calculate the lowest snd_nxt among the remaining
1057 * retransmitted skbs to avoid some costly processing per ACKs.
1058 */
1040static void tcp_mark_lost_retrans(struct sock *sk)
1059static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
1041{
1042 const struct inet_connection_sock *icsk = inet_csk(sk);
1043 struct tcp_sock *tp = tcp_sk(sk);
1044 struct sk_buff *skb;
1045 int cnt = 0;
1046 u32 new_low_seq = tp->snd_nxt;
1047 u32 received_upto = tcp_highest_sack_seq(tp);
1048

--- 24 unchanged lines hidden (view full) ---

1073 * Whether FACK should check here for tp->reordering segs
1074 * in-between one could argue for either way (it would be
1075 * rather simple to implement as we could count fack_count
1076 * during the walk and do tp->fackets_out - fack_count).
1077 */
1078 if (after(received_upto, ack_seq)) {
1079 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1080 tp->retrans_out -= tcp_skb_pcount(skb);
1060{
1061 const struct inet_connection_sock *icsk = inet_csk(sk);
1062 struct tcp_sock *tp = tcp_sk(sk);
1063 struct sk_buff *skb;
1064 int cnt = 0;
1065 u32 new_low_seq = tp->snd_nxt;
1066 u32 received_upto = tcp_highest_sack_seq(tp);
1067

--- 24 unchanged lines hidden (view full) ---

1092 * Whether FACK should check here for tp->reordering segs
1093 * in-between one could argue for either way (it would be
1094 * rather simple to implement as we could count fack_count
1095 * during the walk and do tp->fackets_out - fack_count).
1096 */
1097 if (after(received_upto, ack_seq)) {
1098 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1099 tp->retrans_out -= tcp_skb_pcount(skb);
1081
1100 *flag |= FLAG_LOST_RETRANS;
1082 tcp_skb_mark_lost_uncond_verify(tp, skb);
1083 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1084 } else {
1085 if (before(ack_seq, new_low_seq))
1086 new_low_seq = ack_seq;
1087 cnt += tcp_skb_pcount(skb);
1088 }
1089 }

--- 723 unchanged lines hidden (view full) ---

1813 }
1814 for (j = 0; j < used_sacks; j++)
1815 tp->recv_sack_cache[i++] = sp[j];
1816
1817 if ((state->reord < tp->fackets_out) &&
1818 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1819 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1820
1101 tcp_skb_mark_lost_uncond_verify(tp, skb);
1102 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1103 } else {
1104 if (before(ack_seq, new_low_seq))
1105 new_low_seq = ack_seq;
1106 cnt += tcp_skb_pcount(skb);
1107 }
1108 }

--- 723 unchanged lines hidden (view full) ---

1832 }
1833 for (j = 0; j < used_sacks; j++)
1834 tp->recv_sack_cache[i++] = sp[j];
1835
1836 if ((state->reord < tp->fackets_out) &&
1837 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1838 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1839
1821 tcp_mark_lost_retrans(sk);
1840 tcp_mark_lost_retrans(sk, &state->flag);
1822 tcp_verify_left_out(tp);
1823out:
1824
1825#if FASTRETRANS_DEBUG > 0
1826 WARN_ON((int)tp->sacked_out < 0);
1827 WARN_ON((int)tp->lost_out < 0);
1828 WARN_ON((int)tp->retrans_out < 0);
1829 WARN_ON((int)tcp_packets_in_flight(tp) < 0);

--- 639 unchanged lines hidden (view full) ---

2469 inet_csk(sk)->icsk_retransmits = 0;
2470 if (frto_undo || tcp_is_sack(tp))
2471 tcp_set_ca_state(sk, TCP_CA_Open);
2472 return true;
2473 }
2474 return false;
2475}
2476
1841 tcp_verify_left_out(tp);
1842out:
1843
1844#if FASTRETRANS_DEBUG > 0
1845 WARN_ON((int)tp->sacked_out < 0);
1846 WARN_ON((int)tp->lost_out < 0);
1847 WARN_ON((int)tp->retrans_out < 0);
1848 WARN_ON((int)tcp_packets_in_flight(tp) < 0);

--- 639 unchanged lines hidden (view full) ---

2488 inet_csk(sk)->icsk_retransmits = 0;
2489 if (frto_undo || tcp_is_sack(tp))
2490 tcp_set_ca_state(sk, TCP_CA_Open);
2491 return true;
2492 }
2493 return false;
2494}
2495
2477/* The cwnd reduction in CWR and Recovery use the PRR algorithm
2478 * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
2496/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
2479 * It computes the number of packets to send (sndcnt) based on packets newly
2480 * delivered:
2481 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2482 * cwnd reductions across a full RTT.
2497 * It computes the number of packets to send (sndcnt) based on packets newly
2498 * delivered:
2499 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2500 * cwnd reductions across a full RTT.
2483 * 2) If packets in flight is lower than ssthresh (such as due to excess
2484 * losses and/or application stalls), do not perform any further cwnd
2485 * reductions, but instead slow start up to ssthresh.
2501 * 2) Otherwise PRR uses packet conservation to send as much as delivered.
2502 * But when the retransmits are acked without further losses, PRR
2503 * slow starts cwnd up to ssthresh to speed up the recovery.
2486 */
2487static void tcp_init_cwnd_reduction(struct sock *sk)
2488{
2489 struct tcp_sock *tp = tcp_sk(sk);
2490
2491 tp->high_seq = tp->snd_nxt;
2492 tp->tlp_high_seq = 0;
2493 tp->snd_cwnd_cnt = 0;
2494 tp->prior_cwnd = tp->snd_cwnd;
2495 tp->prr_delivered = 0;
2496 tp->prr_out = 0;
2497 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2498 tcp_ecn_queue_cwr(tp);
2499}
2500
2501static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2504 */
2505static void tcp_init_cwnd_reduction(struct sock *sk)
2506{
2507 struct tcp_sock *tp = tcp_sk(sk);
2508
2509 tp->high_seq = tp->snd_nxt;
2510 tp->tlp_high_seq = 0;
2511 tp->snd_cwnd_cnt = 0;
2512 tp->prior_cwnd = tp->snd_cwnd;
2513 tp->prr_delivered = 0;
2514 tp->prr_out = 0;
2515 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2516 tcp_ecn_queue_cwr(tp);
2517}
2518
2519static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2502 int fast_rexmit)
2520 int fast_rexmit, int flag)
2503{
2504 struct tcp_sock *tp = tcp_sk(sk);
2505 int sndcnt = 0;
2506 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2507 int newly_acked_sacked = prior_unsacked -
2508 (tp->packets_out - tp->sacked_out);
2509
2510 tp->prr_delivered += newly_acked_sacked;
2521{
2522 struct tcp_sock *tp = tcp_sk(sk);
2523 int sndcnt = 0;
2524 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2525 int newly_acked_sacked = prior_unsacked -
2526 (tp->packets_out - tp->sacked_out);
2527
2528 tp->prr_delivered += newly_acked_sacked;
2511 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2529 if (delta < 0) {
2512 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2513 tp->prior_cwnd - 1;
2514 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2530 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2531 tp->prior_cwnd - 1;
2532 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2515 } else {
2533 } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
2534 !(flag & FLAG_LOST_RETRANS)) {
2516 sndcnt = min_t(int, delta,
2517 max_t(int, tp->prr_delivered - tp->prr_out,
2518 newly_acked_sacked) + 1);
2535 sndcnt = min_t(int, delta,
2536 max_t(int, tp->prr_delivered - tp->prr_out,
2537 newly_acked_sacked) + 1);
2538 } else {
2539 sndcnt = min(delta, newly_acked_sacked);
2519 }
2540 }
2520
2521 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2522 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2523}
2524
2525static inline void tcp_end_cwnd_reduction(struct sock *sk)
2526{
2527 struct tcp_sock *tp = tcp_sk(sk);
2528

--- 44 unchanged lines hidden (view full) ---

2573 tp->retrans_stamp = 0;
2574
2575 if (flag & FLAG_ECE)
2576 tcp_enter_cwr(sk);
2577
2578 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2579 tcp_try_keep_open(sk);
2580 } else {
2541 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2542 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2543}
2544
2545static inline void tcp_end_cwnd_reduction(struct sock *sk)
2546{
2547 struct tcp_sock *tp = tcp_sk(sk);
2548

--- 44 unchanged lines hidden (view full) ---

2593 tp->retrans_stamp = 0;
2594
2595 if (flag & FLAG_ECE)
2596 tcp_enter_cwr(sk);
2597
2598 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2599 tcp_try_keep_open(sk);
2600 } else {
2581 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2601 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
2582 }
2583}
2584
2585static void tcp_mtup_probe_failed(struct sock *sk)
2586{
2587 struct inet_connection_sock *icsk = inet_csk(sk);
2588
2589 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2590 icsk->icsk_mtup.probe_size = 0;
2602 }
2603}
2604
2605static void tcp_mtup_probe_failed(struct sock *sk)
2606{
2607 struct inet_connection_sock *icsk = inet_csk(sk);
2608
2609 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2610 icsk->icsk_mtup.probe_size = 0;
2611 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2591}
2592
2593static void tcp_mtup_probe_success(struct sock *sk)
2594{
2595 struct tcp_sock *tp = tcp_sk(sk);
2596 struct inet_connection_sock *icsk = inet_csk(sk);
2597
2598 /* FIXME: breaks with very large cwnd */
2599 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2600 tp->snd_cwnd = tp->snd_cwnd *
2601 tcp_mss_to_mtu(sk, tp->mss_cache) /
2602 icsk->icsk_mtup.probe_size;
2603 tp->snd_cwnd_cnt = 0;
2604 tp->snd_cwnd_stamp = tcp_time_stamp;
2605 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2606
2607 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2608 icsk->icsk_mtup.probe_size = 0;
2609 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2612}
2613
2614static void tcp_mtup_probe_success(struct sock *sk)
2615{
2616 struct tcp_sock *tp = tcp_sk(sk);
2617 struct inet_connection_sock *icsk = inet_csk(sk);
2618
2619 /* FIXME: breaks with very large cwnd */
2620 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2621 tp->snd_cwnd = tp->snd_cwnd *
2622 tcp_mss_to_mtu(sk, tp->mss_cache) /
2623 icsk->icsk_mtup.probe_size;
2624 tp->snd_cwnd_cnt = 0;
2625 tp->snd_cwnd_stamp = tcp_time_stamp;
2626 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2627
2628 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2629 icsk->icsk_mtup.probe_size = 0;
2630 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2631 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2610}
2611
2612/* Do a simple retransmit without using the backoff mechanisms in
2613 * tcp_timer. This is used for path mtu discovery.
2614 * The socket is already locked here.
2615 */
2616void tcp_simple_retransmit(struct sock *sk)
2617{

--- 52 unchanged lines hidden (view full) ---

2670 else
2671 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2672
2673 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2674
2675 tp->prior_ssthresh = 0;
2676 tcp_init_undo(tp);
2677
2632}
2633
2634/* Do a simple retransmit without using the backoff mechanisms in
2635 * tcp_timer. This is used for path mtu discovery.
2636 * The socket is already locked here.
2637 */
2638void tcp_simple_retransmit(struct sock *sk)
2639{

--- 52 unchanged lines hidden (view full) ---

2692 else
2693 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2694
2695 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2696
2697 tp->prior_ssthresh = 0;
2698 tcp_init_undo(tp);
2699
2678 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2700 if (!tcp_in_cwnd_reduction(sk)) {
2679 if (!ece_ack)
2680 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2681 tcp_init_cwnd_reduction(sk);
2682 }
2683 tcp_set_ca_state(sk, TCP_CA_Recovery);
2684}
2685
2686/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are

--- 43 unchanged lines hidden (view full) ---

2730 else if (flag & FLAG_SND_UNA_ADVANCED)
2731 tcp_reset_reno_sack(tp);
2732 }
2733 tcp_xmit_retransmit_queue(sk);
2734}
2735
2736/* Undo during fast recovery after partial ACK. */
2737static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2701 if (!ece_ack)
2702 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2703 tcp_init_cwnd_reduction(sk);
2704 }
2705 tcp_set_ca_state(sk, TCP_CA_Recovery);
2706}
2707
2708/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are

--- 43 unchanged lines hidden (view full) ---

2752 else if (flag & FLAG_SND_UNA_ADVANCED)
2753 tcp_reset_reno_sack(tp);
2754 }
2755 tcp_xmit_retransmit_queue(sk);
2756}
2757
2758/* Undo during fast recovery after partial ACK. */
2759static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2738 const int prior_unsacked)
2760 const int prior_unsacked, int flag)
2739{
2740 struct tcp_sock *tp = tcp_sk(sk);
2741
2742 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2743 /* Plain luck! Hole if filled with delayed
2744 * packet, rather than with a retransmit.
2745 */
2746 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2747
2748 /* We are getting evidence that the reordering degree is higher
2749 * than we realized. If there are no retransmits out then we
2750 * can undo. Otherwise we clock out new packets but do not
2751 * mark more packets lost or retransmit more.
2752 */
2753 if (tp->retrans_out) {
2761{
2762 struct tcp_sock *tp = tcp_sk(sk);
2763
2764 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2765 /* Plain luck! Hole if filled with delayed
2766 * packet, rather than with a retransmit.
2767 */
2768 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2769
2770 /* We are getting evidence that the reordering degree is higher
2771 * than we realized. If there are no retransmits out then we
2772 * can undo. Otherwise we clock out new packets but do not
2773 * mark more packets lost or retransmit more.
2774 */
2775 if (tp->retrans_out) {
2754 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2776 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
2755 return true;
2756 }
2757
2758 if (!tcp_any_retrans_done(sk))
2759 tp->retrans_stamp = 0;
2760
2761 DBGUNDO(sk, "partial recovery");
2762 tcp_undo_cwnd_reduction(sk, true);

--- 70 unchanged lines hidden (view full) ---

2833
2834 /* E. Process state. */
2835 switch (icsk->icsk_ca_state) {
2836 case TCP_CA_Recovery:
2837 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2838 if (tcp_is_reno(tp) && is_dupack)
2839 tcp_add_reno_sack(sk);
2840 } else {
2777 return true;
2778 }
2779
2780 if (!tcp_any_retrans_done(sk))
2781 tp->retrans_stamp = 0;
2782
2783 DBGUNDO(sk, "partial recovery");
2784 tcp_undo_cwnd_reduction(sk, true);

--- 70 unchanged lines hidden (view full) ---

2855
2856 /* E. Process state. */
2857 switch (icsk->icsk_ca_state) {
2858 case TCP_CA_Recovery:
2859 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2860 if (tcp_is_reno(tp) && is_dupack)
2861 tcp_add_reno_sack(sk);
2862 } else {
2841 if (tcp_try_undo_partial(sk, acked, prior_unsacked))
2863 if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
2842 return;
2843 /* Partial ACK arrived. Force fast retransmit. */
2844 do_lost = tcp_is_reno(tp) ||
2845 tcp_fackets_out(tp) > tp->reordering;
2846 }
2847 if (tcp_try_undo_dsack(sk)) {
2848 tcp_try_keep_open(sk);
2849 return;
2850 }
2851 break;
2852 case TCP_CA_Loss:
2853 tcp_process_loss(sk, flag, is_dupack);
2864 return;
2865 /* Partial ACK arrived. Force fast retransmit. */
2866 do_lost = tcp_is_reno(tp) ||
2867 tcp_fackets_out(tp) > tp->reordering;
2868 }
2869 if (tcp_try_undo_dsack(sk)) {
2870 tcp_try_keep_open(sk);
2871 return;
2872 }
2873 break;
2874 case TCP_CA_Loss:
2875 tcp_process_loss(sk, flag, is_dupack);
2854 if (icsk->icsk_ca_state != TCP_CA_Open)
2876 if (icsk->icsk_ca_state != TCP_CA_Open &&
2877 !(flag & FLAG_LOST_RETRANS))
2855 return;
2878 return;
2856 /* Fall through to processing in Open state. */
2879 /* Change state if cwnd is undone or retransmits are lost */
2857 default:
2858 if (tcp_is_reno(tp)) {
2859 if (flag & FLAG_SND_UNA_ADVANCED)
2860 tcp_reset_reno_sack(tp);
2861 if (is_dupack)
2862 tcp_add_reno_sack(sk);
2863 }
2864

--- 18 unchanged lines hidden (view full) ---

2883
2884 /* Otherwise enter Recovery state */
2885 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2886 fast_rexmit = 1;
2887 }
2888
2889 if (do_lost)
2890 tcp_update_scoreboard(sk, fast_rexmit);
2880 default:
2881 if (tcp_is_reno(tp)) {
2882 if (flag & FLAG_SND_UNA_ADVANCED)
2883 tcp_reset_reno_sack(tp);
2884 if (is_dupack)
2885 tcp_add_reno_sack(sk);
2886 }
2887

--- 18 unchanged lines hidden (view full) ---

2906
2907 /* Otherwise enter Recovery state */
2908 tcp_enter_recovery(sk, (flag & FLAG_ECE));
2909 fast_rexmit = 1;
2910 }
2911
2912 if (do_lost)
2913 tcp_update_scoreboard(sk, fast_rexmit);
2891 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
2914 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
2892 tcp_xmit_retransmit_queue(sk);
2893}
2894
2895static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2896 long seq_rtt_us, long sack_rtt_us)
2897{
2898 const struct tcp_sock *tp = tcp_sk(sk);
2899

--- 420 unchanged lines hidden (view full) ---

3320 tp->snd_wnd = nwin;
3321
3322 /* Note, it is the only place, where
3323 * fast path is recovered for sending TCP.
3324 */
3325 tp->pred_flags = 0;
3326 tcp_fast_path_check(sk);
3327
2915 tcp_xmit_retransmit_queue(sk);
2916}
2917
2918static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2919 long seq_rtt_us, long sack_rtt_us)
2920{
2921 const struct tcp_sock *tp = tcp_sk(sk);
2922

--- 420 unchanged lines hidden (view full) ---

3343 tp->snd_wnd = nwin;
3344
3345 /* Note, it is the only place, where
3346 * fast path is recovered for sending TCP.
3347 */
3348 tp->pred_flags = 0;
3349 tcp_fast_path_check(sk);
3350
3351 if (tcp_send_head(sk))
3352 tcp_slow_start_after_idle_check(sk);
3353
3328 if (nwin > tp->max_window) {
3329 tp->max_window = nwin;
3330 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3331 }
3332 }
3333 }
3334
3335 tcp_snd_una_update(tp, ack);

--- 221 unchanged lines hidden (view full) ---

3557 goto no_queue;
3558
3559 /* See if we can take anything off of the retransmit queue. */
3560 acked = tp->packets_out;
3561 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3562 &sack_state);
3563 acked -= tp->packets_out;
3564
3354 if (nwin > tp->max_window) {
3355 tp->max_window = nwin;
3356 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
3357 }
3358 }
3359 }
3360
3361 tcp_snd_una_update(tp, ack);

--- 221 unchanged lines hidden (view full) ---

3583 goto no_queue;
3584
3585 /* See if we can take anything off of the retransmit queue. */
3586 acked = tp->packets_out;
3587 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3588 &sack_state);
3589 acked -= tp->packets_out;
3590
3565 /* Advance cwnd if state allows */
3566 if (tcp_may_raise_cwnd(sk, flag))
3567 tcp_cong_avoid(sk, ack, acked);
3568
3569 if (tcp_ack_is_dubious(sk, flag)) {
3570 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3571 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3572 is_dupack, flag);
3573 }
3574 if (tp->tlp_high_seq)
3575 tcp_process_tlp_ack(sk, ack, flag);
3576
3591 if (tcp_ack_is_dubious(sk, flag)) {
3592 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3593 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3594 is_dupack, flag);
3595 }
3596 if (tp->tlp_high_seq)
3597 tcp_process_tlp_ack(sk, ack, flag);
3598
3599 /* Advance cwnd if state allows */
3600 if (tcp_may_raise_cwnd(sk, flag))
3601 tcp_cong_avoid(sk, ack, acked);
3602
3577 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3578 struct dst_entry *dst = __sk_dst_get(sk);
3579 if (dst)
3580 dst_confirm(dst);
3581 }
3582
3583 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3584 tcp_schedule_loss_probe(sk);

--- 357 unchanged lines hidden (view full) ---

3942 * If we are in FINWAIT-1, a received FIN indicates simultaneous
3943 * close and we go into CLOSING (and later onto TIME-WAIT)
3944 *
3945 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3946 */
3947static void tcp_fin(struct sock *sk)
3948{
3949 struct tcp_sock *tp = tcp_sk(sk);
3603 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3604 struct dst_entry *dst = __sk_dst_get(sk);
3605 if (dst)
3606 dst_confirm(dst);
3607 }
3608
3609 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3610 tcp_schedule_loss_probe(sk);

--- 357 unchanged lines hidden (view full) ---

3968 * If we are in FINWAIT-1, a received FIN indicates simultaneous
3969 * close and we go into CLOSING (and later onto TIME-WAIT)
3970 *
3971 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3972 */
3973static void tcp_fin(struct sock *sk)
3974{
3975 struct tcp_sock *tp = tcp_sk(sk);
3950 const struct dst_entry *dst;
3951
3952 inet_csk_schedule_ack(sk);
3953
3954 sk->sk_shutdown |= RCV_SHUTDOWN;
3955 sock_set_flag(sk, SOCK_DONE);
3956
3957 switch (sk->sk_state) {
3958 case TCP_SYN_RECV:
3959 case TCP_ESTABLISHED:
3960 /* Move to CLOSE_WAIT */
3961 tcp_set_state(sk, TCP_CLOSE_WAIT);
3976
3977 inet_csk_schedule_ack(sk);
3978
3979 sk->sk_shutdown |= RCV_SHUTDOWN;
3980 sock_set_flag(sk, SOCK_DONE);
3981
3982 switch (sk->sk_state) {
3983 case TCP_SYN_RECV:
3984 case TCP_ESTABLISHED:
3985 /* Move to CLOSE_WAIT */
3986 tcp_set_state(sk, TCP_CLOSE_WAIT);
3962 dst = __sk_dst_get(sk);
3963 if (!dst || !dst_metric(dst, RTAX_QUICKACK))
3964 inet_csk(sk)->icsk_ack.pingpong = 1;
3987 inet_csk(sk)->icsk_ack.pingpong = 1;
3965 break;
3966
3967 case TCP_CLOSE_WAIT:
3968 case TCP_CLOSING:
3969 /* Received a retransmission of the FIN, do
3970 * nothing.
3971 */
3972 break;

--- 2002 unchanged lines hidden (view full) ---

5975 const struct sk_buff *skb,
5976 const struct sock *listen_sk,
5977 const struct dst_entry *dst)
5978{
5979 const struct tcphdr *th = tcp_hdr(skb);
5980 const struct net *net = sock_net(listen_sk);
5981 bool th_ecn = th->ece && th->cwr;
5982 bool ect, ecn_ok;
3988 break;
3989
3990 case TCP_CLOSE_WAIT:
3991 case TCP_CLOSING:
3992 /* Received a retransmission of the FIN, do
3993 * nothing.
3994 */
3995 break;

--- 2002 unchanged lines hidden (view full) ---

5998 const struct sk_buff *skb,
5999 const struct sock *listen_sk,
6000 const struct dst_entry *dst)
6001{
6002 const struct tcphdr *th = tcp_hdr(skb);
6003 const struct net *net = sock_net(listen_sk);
6004 bool th_ecn = th->ece && th->cwr;
6005 bool ect, ecn_ok;
6006 u32 ecn_ok_dst;
5983
5984 if (!th_ecn)
5985 return;
5986
5987 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6007
6008 if (!th_ecn)
6009 return;
6010
6011 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
5988 ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
6012 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6013 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
5989
6014
5990 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
6015 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6016 (ecn_ok_dst & DST_FEATURE_ECN_CA))
5991 inet_rsk(req)->ecn_ok = 1;
5992}
5993
5994static void tcp_openreq_init(struct request_sock *req,
5995 const struct tcp_options_received *rx_opt,
5996 struct sk_buff *skb, const struct sock *sk)
5997{
5998 struct inet_request_sock *ireq = inet_rsk(req);

--- 231 unchanged lines hidden ---
6017 inet_rsk(req)->ecn_ok = 1;
6018}
6019
6020static void tcp_openreq_init(struct request_sock *req,
6021 const struct tcp_options_received *rx_opt,
6022 struct sk_buff *skb, const struct sock *sk)
6023{
6024 struct inet_request_sock *ireq = inet_rsk(req);

--- 231 unchanged lines hidden ---