tcp_output.c (bba73071b6f71be0a101658d7c13866e30b264a6) tcp_output.c (dcb8c9b4373a583451b1b8a3e916d33de273633d)
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Authors: Ross Biro

--- 1192 unchanged lines hidden (view full) ---

1201 tcp_add_write_queue_tail(sk, skb);
1202 sk->sk_wmem_queued += skb->truesize;
1203 sk_mem_charge(sk, skb->truesize);
1204}
1205
1206/* Initialize TSO segments for a packet. */
1207static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1208{
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Authors: Ross Biro

--- 1192 unchanged lines hidden (view full) ---

1201 tcp_add_write_queue_tail(sk, skb);
1202 sk->sk_wmem_queued += skb->truesize;
1203 sk_mem_charge(sk, skb->truesize);
1204}
1205
1206/* Initialize TSO segments for a packet. */
1207static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1208{
1209 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
1209 if (skb->len <= mss_now) {
1210 /* Avoid the costly divide in the normal
1211 * non-TSO case.
1212 */
1213 tcp_skb_pcount_set(skb, 1);
1214 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1215 } else {
1216 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1217 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;

--- 112 unchanged lines hidden (view full) ---

1330
1331 /* PSH and FIN should only be set in the second packet. */
1332 flags = TCP_SKB_CB(skb)->tcp_flags;
1333 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1334 TCP_SKB_CB(buff)->tcp_flags = flags;
1335 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1336 tcp_skb_fragment_eor(skb, buff);
1337
1210 /* Avoid the costly divide in the normal
1211 * non-TSO case.
1212 */
1213 tcp_skb_pcount_set(skb, 1);
1214 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1215 } else {
1216 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1217 TCP_SKB_CB(skb)->tcp_gso_size = mss_now;

--- 112 unchanged lines hidden (view full) ---

1330
1331 /* PSH and FIN should only be set in the second packet. */
1332 flags = TCP_SKB_CB(skb)->tcp_flags;
1333 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1334 TCP_SKB_CB(buff)->tcp_flags = flags;
1335 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1336 tcp_skb_fragment_eor(skb, buff);
1337
1338 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1339 /* Copy and checksum data tail into the new buffer. */
1340 buff->csum = csum_partial_copy_nocheck(skb->data + len,
1341 skb_put(buff, nsize),
1342 nsize, 0);
1338 skb_split(skb, buff, len);
1343
1339
1344 skb_trim(skb, len);
1340 buff->ip_summed = CHECKSUM_PARTIAL;
1345
1341
1346 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
1347 } else {
1348 skb->ip_summed = CHECKSUM_PARTIAL;
1349 skb_split(skb, buff, len);
1350 }
1351
1352 buff->ip_summed = skb->ip_summed;
1353
1354 buff->tstamp = skb->tstamp;
1355 tcp_fragment_tstamp(skb, buff);
1356
1357 old_factor = tcp_skb_pcount(skb);
1358
1359 /* Fix up tso_factor for both original and new SKB. */
1360 tcp_set_skb_tso_segs(skb, mss_now);
1361 tcp_set_skb_tso_segs(buff, mss_now);

--- 348 unchanged lines hidden (view full) ---

1710 return partial &&
1711 ((nonagle & TCP_NAGLE_CORK) ||
1712 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1713}
1714
1715/* Return how many segs we'd like on a TSO packet,
1716 * to send one TSO packet per ms
1717 */
1342 buff->tstamp = skb->tstamp;
1343 tcp_fragment_tstamp(skb, buff);
1344
1345 old_factor = tcp_skb_pcount(skb);
1346
1347 /* Fix up tso_factor for both original and new SKB. */
1348 tcp_set_skb_tso_segs(skb, mss_now);
1349 tcp_set_skb_tso_segs(buff, mss_now);

--- 348 unchanged lines hidden (view full) ---

1698 return partial &&
1699 ((nonagle & TCP_NAGLE_CORK) ||
1700 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1701}
1702
1703/* Return how many segs we'd like on a TSO packet,
1704 * to send one TSO packet per ms
1705 */
1718u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1719 int min_tso_segs)
1706static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1707 int min_tso_segs)
1720{
1721 u32 bytes, segs;
1722
1723 bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
1724 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1725
1726 /* Goal is to send at least one packet per ms,
1727 * not one big TSO packet every 100 ms.
1728 * This preserves ACK clocking and is consistent
1729 * with tcp_tso_should_defer() heuristic.
1730 */
1731 segs = max_t(u32, bytes / mss_now, min_tso_segs);
1732
1708{
1709 u32 bytes, segs;
1710
1711 bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
1712 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1713
1714 /* Goal is to send at least one packet per ms,
1715 * not one big TSO packet every 100 ms.
1716 * This preserves ACK clocking and is consistent
1717 * with tcp_tso_should_defer() heuristic.
1718 */
1719 segs = max_t(u32, bytes / mss_now, min_tso_segs);
1720
1733 return min_t(u32, segs, sk->sk_gso_max_segs);
1721 return segs;
1734}
1722}
1735EXPORT_SYMBOL(tcp_tso_autosize);
1736
1737/* Return the number of segments we want in the skb we are transmitting.
1738 * See if congestion control module wants to decide; otherwise, autosize.
1739 */
1740static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1741{
1742 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1723
1724/* Return the number of segments we want in the skb we are transmitting.
1725 * See if congestion control module wants to decide; otherwise, autosize.
1726 */
1727static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1728{
1729 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1743 u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
1730 u32 min_tso, tso_segs;
1744
1731
1745 return tso_segs ? :
1746 tcp_tso_autosize(sk, mss_now,
1747 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
1732 min_tso = ca_ops->min_tso_segs ?
1733 ca_ops->min_tso_segs(sk) :
1734 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
1735
1736 tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
1737 return min_t(u32, tso_segs, sk->sk_gso_max_segs);
1748}
1749
1750/* Returns the portion of skb which can be sent right away */
1751static unsigned int tcp_mss_split_point(const struct sock *sk,
1752 const struct sk_buff *skb,
1753 unsigned int mss_now,
1754 unsigned int max_segs,
1755 int nonagle)

--- 140 unchanged lines hidden (view full) ---

1896 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1897 TCP_SKB_CB(buff)->tcp_flags = flags;
1898
1899 /* This packet was never sent out yet, so no SACK bits. */
1900 TCP_SKB_CB(buff)->sacked = 0;
1901
1902 tcp_skb_fragment_eor(skb, buff);
1903
1738}
1739
1740/* Returns the portion of skb which can be sent right away */
1741static unsigned int tcp_mss_split_point(const struct sock *sk,
1742 const struct sk_buff *skb,
1743 unsigned int mss_now,
1744 unsigned int max_segs,
1745 int nonagle)

--- 140 unchanged lines hidden (view full) ---

1886 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1887 TCP_SKB_CB(buff)->tcp_flags = flags;
1888
1889 /* This packet was never sent out yet, so no SACK bits. */
1890 TCP_SKB_CB(buff)->sacked = 0;
1891
1892 tcp_skb_fragment_eor(skb, buff);
1893
1904 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1894 buff->ip_summed = CHECKSUM_PARTIAL;
1905 skb_split(skb, buff, len);
1906 tcp_fragment_tstamp(skb, buff);
1907
1908 /* Fix up tso_factor for both original and new SKB. */
1909 tcp_set_skb_tso_segs(skb, mss_now);
1910 tcp_set_skb_tso_segs(buff, mss_now);
1911
1912 /* Link BUFF into the send queue. */

--- 109 unchanged lines hidden (view full) ---

2022 icsk->icsk_af_ops->net_header_len;
2023 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
2024
2025 /* Update probe time stamp */
2026 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
2027 }
2028}
2029
1895 skb_split(skb, buff, len);
1896 tcp_fragment_tstamp(skb, buff);
1897
1898 /* Fix up tso_factor for both original and new SKB. */
1899 tcp_set_skb_tso_segs(skb, mss_now);
1900 tcp_set_skb_tso_segs(buff, mss_now);
1901
1902 /* Link BUFF into the send queue. */

--- 109 unchanged lines hidden (view full) ---

2012 icsk->icsk_af_ops->net_header_len;
2013 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
2014
2015 /* Update probe time stamp */
2016 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
2017 }
2018}
2019
2020static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2021{
2022 struct sk_buff *skb, *next;
2023
2024 skb = tcp_send_head(sk);
2025 tcp_for_write_queue_from_safe(skb, next, sk) {
2026 if (len <= skb->len)
2027 break;
2028
2029 if (unlikely(TCP_SKB_CB(skb)->eor))
2030 return false;
2031
2032 len -= skb->len;
2033 }
2034
2035 return true;
2036}
2037
2030/* Create a new MTU probe if we are ready.
2031 * MTU probe is regularly attempting to increase the path MTU by
2032 * deliberately sending larger packets. This discovers routing
2033 * changes resulting in larger path MTUs.
2034 *
2035 * Returns 0 if we should wait to probe (no cwnd available),
2036 * 1 if a probe was sent,
2037 * -1 otherwise

--- 56 unchanged lines hidden (view full) ---

2094 /* Do we need to wait to drain cwnd? With none in flight, don't stall */
2095 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
2096 if (!tcp_packets_in_flight(tp))
2097 return -1;
2098 else
2099 return 0;
2100 }
2101
2038/* Create a new MTU probe if we are ready.
2039 * MTU probe is regularly attempting to increase the path MTU by
2040 * deliberately sending larger packets. This discovers routing
2041 * changes resulting in larger path MTUs.
2042 *
2043 * Returns 0 if we should wait to probe (no cwnd available),
2044 * 1 if a probe was sent,
2045 * -1 otherwise

--- 56 unchanged lines hidden (view full) ---

2102 /* Do we need to wait to drain cwnd? With none in flight, don't stall */
2103 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
2104 if (!tcp_packets_in_flight(tp))
2105 return -1;
2106 else
2107 return 0;
2108 }
2109
2110 if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
2111 return -1;
2112
2102 /* We're allowed to probe. Build it now. */
2103 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
2104 if (!nskb)
2105 return -1;
2106 sk->sk_wmem_queued += nskb->truesize;
2107 sk_mem_charge(sk, nskb->truesize);
2108
2109 skb = tcp_send_head(sk);
2110
2111 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
2112 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
2113 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
2114 TCP_SKB_CB(nskb)->sacked = 0;
2115 nskb->csum = 0;
2113 /* We're allowed to probe. Build it now. */
2114 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
2115 if (!nskb)
2116 return -1;
2117 sk->sk_wmem_queued += nskb->truesize;
2118 sk_mem_charge(sk, nskb->truesize);
2119
2120 skb = tcp_send_head(sk);
2121
2122 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
2123 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
2124 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
2125 TCP_SKB_CB(nskb)->sacked = 0;
2126 nskb->csum = 0;
2116 nskb->ip_summed = skb->ip_summed;
2127 nskb->ip_summed = CHECKSUM_PARTIAL;
2117
2118 tcp_insert_write_queue_before(nskb, skb, sk);
2119 tcp_highest_sack_replace(sk, skb, nskb);
2120
2121 len = 0;
2122 tcp_for_write_queue_from_safe(skb, next, sk) {
2123 copy = min_t(int, skb->len, probe_size - len);
2128
2129 tcp_insert_write_queue_before(nskb, skb, sk);
2130 tcp_highest_sack_replace(sk, skb, nskb);
2131
2132 len = 0;
2133 tcp_for_write_queue_from_safe(skb, next, sk) {
2134 copy = min_t(int, skb->len, probe_size - len);
2124 if (nskb->ip_summed) {
2125 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2126 } else {
2127 __wsum csum = skb_copy_and_csum_bits(skb, 0,
2128 skb_put(nskb, copy),
2129 copy, 0);
2130 nskb->csum = csum_block_add(nskb->csum, csum, len);
2131 }
2135 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
2132
2133 if (skb->len <= copy) {
2134 /* We've eaten all the data from this skb.
2135 * Throw it away. */
2136 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
2136
2137 if (skb->len <= copy) {
2138 /* We've eaten all the data from this skb.
2139 * Throw it away. */
2140 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
2141 /* If this is the last SKB we copy and eor is set
2142 * we need to propagate it to the new skb.
2143 */
2144 TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
2137 tcp_unlink_write_queue(skb, sk);
2138 sk_wmem_free_skb(sk, skb);
2139 } else {
2140 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
2141 ~(TCPHDR_FIN|TCPHDR_PSH);
2142 if (!skb_shinfo(skb)->nr_frags) {
2143 skb_pull(skb, copy);
2145 tcp_unlink_write_queue(skb, sk);
2146 sk_wmem_free_skb(sk, skb);
2147 } else {
2148 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
2149 ~(TCPHDR_FIN|TCPHDR_PSH);
2150 if (!skb_shinfo(skb)->nr_frags) {
2151 skb_pull(skb, copy);
2144 if (skb->ip_summed != CHECKSUM_PARTIAL)
2145 skb->csum = csum_partial(skb->data,
2146 skb->len, 0);
2147 } else {
2148 __pskb_trim_head(skb, copy);
2149 tcp_set_skb_tso_segs(skb, mss_now);
2150 }
2151 TCP_SKB_CB(skb)->seq += copy;
2152 }
2153
2154 len += copy;

--- 561 unchanged lines hidden (view full) ---

2716 if (next_skb_size <= skb_availroom(skb))
2717 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2718 next_skb_size);
2719 else if (!skb_shift(skb, next_skb, next_skb_size))
2720 return false;
2721 }
2722 tcp_highest_sack_replace(sk, next_skb, skb);
2723
2152 } else {
2153 __pskb_trim_head(skb, copy);
2154 tcp_set_skb_tso_segs(skb, mss_now);
2155 }
2156 TCP_SKB_CB(skb)->seq += copy;
2157 }
2158
2159 len += copy;

--- 561 unchanged lines hidden (view full) ---

2721 if (next_skb_size <= skb_availroom(skb))
2722 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2723 next_skb_size);
2724 else if (!skb_shift(skb, next_skb, next_skb_size))
2725 return false;
2726 }
2727 tcp_highest_sack_replace(sk, next_skb, skb);
2728
2724 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2725 skb->ip_summed = CHECKSUM_PARTIAL;
2726
2727 if (skb->ip_summed != CHECKSUM_PARTIAL)
2728 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
2729
2730 /* Update sequence range on original skb. */
2731 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2732
2733 /* Merge over control information. This moves PSH/FIN etc. over */
2734 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2735
2736 /* All done, get rid of second SKB and account for it so
2737 * packet counting does not break.

--- 1031 unchanged lines hidden ---
2729 /* Update sequence range on original skb. */
2730 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2731
2732 /* Merge over control information. This moves PSH/FIN etc. over */
2733 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
2734
2735 /* All done, get rid of second SKB and account for it so
2736 * packet counting does not break.

--- 1031 unchanged lines hidden ---