tcp_output.c (bba73071b6f71be0a101658d7c13866e30b264a6) | tcp_output.c (dcb8c9b4373a583451b1b8a3e916d33de273633d) |
---|---|
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Implementation of the Transmission Control Protocol(TCP). 7 * 8 * Authors: Ross Biro --- 1192 unchanged lines hidden (view full) --- 1201 tcp_add_write_queue_tail(sk, skb); 1202 sk->sk_wmem_queued += skb->truesize; 1203 sk_mem_charge(sk, skb->truesize); 1204} 1205 1206/* Initialize TSO segments for a packet. */ 1207static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) 1208{ | 1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Implementation of the Transmission Control Protocol(TCP). 7 * 8 * Authors: Ross Biro --- 1192 unchanged lines hidden (view full) --- 1201 tcp_add_write_queue_tail(sk, skb); 1202 sk->sk_wmem_queued += skb->truesize; 1203 sk_mem_charge(sk, skb->truesize); 1204} 1205 1206/* Initialize TSO segments for a packet. */ 1207static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) 1208{ |
1209 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { | 1209 if (skb->len <= mss_now) { |
1210 /* Avoid the costly divide in the normal 1211 * non-TSO case. 1212 */ 1213 tcp_skb_pcount_set(skb, 1); 1214 TCP_SKB_CB(skb)->tcp_gso_size = 0; 1215 } else { 1216 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); 1217 TCP_SKB_CB(skb)->tcp_gso_size = mss_now; --- 112 unchanged lines hidden (view full) --- 1330 1331 /* PSH and FIN should only be set in the second packet. */ 1332 flags = TCP_SKB_CB(skb)->tcp_flags; 1333 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1334 TCP_SKB_CB(buff)->tcp_flags = flags; 1335 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1336 tcp_skb_fragment_eor(skb, buff); 1337 | 1210 /* Avoid the costly divide in the normal 1211 * non-TSO case. 1212 */ 1213 tcp_skb_pcount_set(skb, 1); 1214 TCP_SKB_CB(skb)->tcp_gso_size = 0; 1215 } else { 1216 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); 1217 TCP_SKB_CB(skb)->tcp_gso_size = mss_now; --- 112 unchanged lines hidden (view full) --- 1330 1331 /* PSH and FIN should only be set in the second packet. */ 1332 flags = TCP_SKB_CB(skb)->tcp_flags; 1333 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1334 TCP_SKB_CB(buff)->tcp_flags = flags; 1335 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1336 tcp_skb_fragment_eor(skb, buff); 1337 |
1338 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 1339 /* Copy and checksum data tail into the new buffer. */ 1340 buff->csum = csum_partial_copy_nocheck(skb->data + len, 1341 skb_put(buff, nsize), 1342 nsize, 0); | 1338 skb_split(skb, buff, len); |
1343 | 1339 |
1344 skb_trim(skb, len); | 1340 buff->ip_summed = CHECKSUM_PARTIAL; |
1345 | 1341 |
1346 skb->csum = csum_block_sub(skb->csum, buff->csum, len); 1347 } else { 1348 skb->ip_summed = CHECKSUM_PARTIAL; 1349 skb_split(skb, buff, len); 1350 } 1351 1352 buff->ip_summed = skb->ip_summed; 1353 | |
1354 buff->tstamp = skb->tstamp; 1355 tcp_fragment_tstamp(skb, buff); 1356 1357 old_factor = tcp_skb_pcount(skb); 1358 1359 /* Fix up tso_factor for both original and new SKB. */ 1360 tcp_set_skb_tso_segs(skb, mss_now); 1361 tcp_set_skb_tso_segs(buff, mss_now); --- 348 unchanged lines hidden (view full) --- 1710 return partial && 1711 ((nonagle & TCP_NAGLE_CORK) || 1712 (!nonagle && tp->packets_out && tcp_minshall_check(tp))); 1713} 1714 1715/* Return how many segs we'd like on a TSO packet, 1716 * to send one TSO packet per ms 1717 */ | 1342 buff->tstamp = skb->tstamp; 1343 tcp_fragment_tstamp(skb, buff); 1344 1345 old_factor = tcp_skb_pcount(skb); 1346 1347 /* Fix up tso_factor for both original and new SKB. */ 1348 tcp_set_skb_tso_segs(skb, mss_now); 1349 tcp_set_skb_tso_segs(buff, mss_now); --- 348 unchanged lines hidden (view full) --- 1698 return partial && 1699 ((nonagle & TCP_NAGLE_CORK) || 1700 (!nonagle && tp->packets_out && tcp_minshall_check(tp))); 1701} 1702 1703/* Return how many segs we'd like on a TSO packet, 1704 * to send one TSO packet per ms 1705 */ |
1718u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, 1719 int min_tso_segs) | 1706static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, 1707 int min_tso_segs) |
1720{ 1721 u32 bytes, segs; 1722 1723 bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, 1724 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); 1725 1726 /* Goal is to send at least one packet per ms, 1727 * not one big TSO packet every 100 ms. 1728 * This preserves ACK clocking and is consistent 1729 * with tcp_tso_should_defer() heuristic. 1730 */ 1731 segs = max_t(u32, bytes / mss_now, min_tso_segs); 1732 | 1708{ 1709 u32 bytes, segs; 1710 1711 bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, 1712 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); 1713 1714 /* Goal is to send at least one packet per ms, 1715 * not one big TSO packet every 100 ms. 1716 * This preserves ACK clocking and is consistent 1717 * with tcp_tso_should_defer() heuristic. 1718 */ 1719 segs = max_t(u32, bytes / mss_now, min_tso_segs); 1720 |
1733 return min_t(u32, segs, sk->sk_gso_max_segs); | 1721 return segs; |
1734} | 1722} |
1735EXPORT_SYMBOL(tcp_tso_autosize); | |
1736 1737/* Return the number of segments we want in the skb we are transmitting. 1738 * See if congestion control module wants to decide; otherwise, autosize. 1739 */ 1740static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) 1741{ 1742 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; | 1723 1724/* Return the number of segments we want in the skb we are transmitting. 1725 * See if congestion control module wants to decide; otherwise, autosize. 1726 */ 1727static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) 1728{ 1729 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; |
1743 u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; | 1730 u32 min_tso, tso_segs; |
1744 | 1731 |
1745 return tso_segs ? : 1746 tcp_tso_autosize(sk, mss_now, 1747 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); | 1732 min_tso = ca_ops->min_tso_segs ? 1733 ca_ops->min_tso_segs(sk) : 1734 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; 1735 1736 tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); 1737 return min_t(u32, tso_segs, sk->sk_gso_max_segs); |
1748} 1749 1750/* Returns the portion of skb which can be sent right away */ 1751static unsigned int tcp_mss_split_point(const struct sock *sk, 1752 const struct sk_buff *skb, 1753 unsigned int mss_now, 1754 unsigned int max_segs, 1755 int nonagle) --- 140 unchanged lines hidden (view full) --- 1896 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1897 TCP_SKB_CB(buff)->tcp_flags = flags; 1898 1899 /* This packet was never sent out yet, so no SACK bits. */ 1900 TCP_SKB_CB(buff)->sacked = 0; 1901 1902 tcp_skb_fragment_eor(skb, buff); 1903 | 1738} 1739 1740/* Returns the portion of skb which can be sent right away */ 1741static unsigned int tcp_mss_split_point(const struct sock *sk, 1742 const struct sk_buff *skb, 1743 unsigned int mss_now, 1744 unsigned int max_segs, 1745 int nonagle) --- 140 unchanged lines hidden (view full) --- 1886 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1887 TCP_SKB_CB(buff)->tcp_flags = flags; 1888 1889 /* This packet was never sent out yet, so no SACK bits. */ 1890 TCP_SKB_CB(buff)->sacked = 0; 1891 1892 tcp_skb_fragment_eor(skb, buff); 1893 |
1904 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; | 1894 buff->ip_summed = CHECKSUM_PARTIAL; |
1905 skb_split(skb, buff, len); 1906 tcp_fragment_tstamp(skb, buff); 1907 1908 /* Fix up tso_factor for both original and new SKB. */ 1909 tcp_set_skb_tso_segs(skb, mss_now); 1910 tcp_set_skb_tso_segs(buff, mss_now); 1911 1912 /* Link BUFF into the send queue. */ --- 109 unchanged lines hidden (view full) --- 2022 icsk->icsk_af_ops->net_header_len; 2023 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); 2024 2025 /* Update probe time stamp */ 2026 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; 2027 } 2028} 2029 | 1895 skb_split(skb, buff, len); 1896 tcp_fragment_tstamp(skb, buff); 1897 1898 /* Fix up tso_factor for both original and new SKB. */ 1899 tcp_set_skb_tso_segs(skb, mss_now); 1900 tcp_set_skb_tso_segs(buff, mss_now); 1901 1902 /* Link BUFF into the send queue. */ --- 109 unchanged lines hidden (view full) --- 2012 icsk->icsk_af_ops->net_header_len; 2013 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); 2014 2015 /* Update probe time stamp */ 2016 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; 2017 } 2018} 2019 |
2020static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) 2021{ 2022 struct sk_buff *skb, *next; 2023 2024 skb = tcp_send_head(sk); 2025 tcp_for_write_queue_from_safe(skb, next, sk) { 2026 if (len <= skb->len) 2027 break; 2028 2029 if (unlikely(TCP_SKB_CB(skb)->eor)) 2030 return false; 2031 2032 len -= skb->len; 2033 } 2034 2035 return true; 2036} 2037 |
|
2030/* Create a new MTU probe if we are ready. 2031 * MTU probe is regularly attempting to increase the path MTU by 2032 * deliberately sending larger packets. This discovers routing 2033 * changes resulting in larger path MTUs. 2034 * 2035 * Returns 0 if we should wait to probe (no cwnd available), 2036 * 1 if a probe was sent, 2037 * -1 otherwise --- 56 unchanged lines hidden (view full) --- 2094 /* Do we need to wait to drain cwnd? With none in flight, don't stall */ 2095 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) { 2096 if (!tcp_packets_in_flight(tp)) 2097 return -1; 2098 else 2099 return 0; 2100 } 2101 | 2038/* Create a new MTU probe if we are ready. 2039 * MTU probe is regularly attempting to increase the path MTU by 2040 * deliberately sending larger packets. This discovers routing 2041 * changes resulting in larger path MTUs. 2042 * 2043 * Returns 0 if we should wait to probe (no cwnd available), 2044 * 1 if a probe was sent, 2045 * -1 otherwise --- 56 unchanged lines hidden (view full) --- 2102 /* Do we need to wait to drain cwnd? With none in flight, don't stall */ 2103 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) { 2104 if (!tcp_packets_in_flight(tp)) 2105 return -1; 2106 else 2107 return 0; 2108 } 2109 |
2110 if (!tcp_can_coalesce_send_queue_head(sk, probe_size)) 2111 return -1; 2112 |
|
2102 /* We're allowed to probe. Build it now. */ 2103 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); 2104 if (!nskb) 2105 return -1; 2106 sk->sk_wmem_queued += nskb->truesize; 2107 sk_mem_charge(sk, nskb->truesize); 2108 2109 skb = tcp_send_head(sk); 2110 2111 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 2112 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 2113 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; 2114 TCP_SKB_CB(nskb)->sacked = 0; 2115 nskb->csum = 0; | 2113 /* We're allowed to probe. Build it now. */ 2114 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); 2115 if (!nskb) 2116 return -1; 2117 sk->sk_wmem_queued += nskb->truesize; 2118 sk_mem_charge(sk, nskb->truesize); 2119 2120 skb = tcp_send_head(sk); 2121 2122 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 2123 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 2124 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; 2125 TCP_SKB_CB(nskb)->sacked = 0; 2126 nskb->csum = 0; |
2116 nskb->ip_summed = skb->ip_summed; | 2127 nskb->ip_summed = CHECKSUM_PARTIAL; |
2117 2118 tcp_insert_write_queue_before(nskb, skb, sk); 2119 tcp_highest_sack_replace(sk, skb, nskb); 2120 2121 len = 0; 2122 tcp_for_write_queue_from_safe(skb, next, sk) { 2123 copy = min_t(int, skb->len, probe_size - len); | 2128 2129 tcp_insert_write_queue_before(nskb, skb, sk); 2130 tcp_highest_sack_replace(sk, skb, nskb); 2131 2132 len = 0; 2133 tcp_for_write_queue_from_safe(skb, next, sk) { 2134 copy = min_t(int, skb->len, probe_size - len); |
2124 if (nskb->ip_summed) { 2125 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); 2126 } else { 2127 __wsum csum = skb_copy_and_csum_bits(skb, 0, 2128 skb_put(nskb, copy), 2129 copy, 0); 2130 nskb->csum = csum_block_add(nskb->csum, csum, len); 2131 } | 2135 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); |
2132 2133 if (skb->len <= copy) { 2134 /* We've eaten all the data from this skb. 2135 * Throw it away. */ 2136 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; | 2136 2137 if (skb->len <= copy) { 2138 /* We've eaten all the data from this skb. 2139 * Throw it away. */ 2140 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; |
2141 /* If this is the last SKB we copy and eor is set 2142 * we need to propagate it to the new skb. 2143 */ 2144 TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; |
|
2137 tcp_unlink_write_queue(skb, sk); 2138 sk_wmem_free_skb(sk, skb); 2139 } else { 2140 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & 2141 ~(TCPHDR_FIN|TCPHDR_PSH); 2142 if (!skb_shinfo(skb)->nr_frags) { 2143 skb_pull(skb, copy); | 2145 tcp_unlink_write_queue(skb, sk); 2146 sk_wmem_free_skb(sk, skb); 2147 } else { 2148 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & 2149 ~(TCPHDR_FIN|TCPHDR_PSH); 2150 if (!skb_shinfo(skb)->nr_frags) { 2151 skb_pull(skb, copy); |
2144 if (skb->ip_summed != CHECKSUM_PARTIAL) 2145 skb->csum = csum_partial(skb->data, 2146 skb->len, 0); | |
2147 } else { 2148 __pskb_trim_head(skb, copy); 2149 tcp_set_skb_tso_segs(skb, mss_now); 2150 } 2151 TCP_SKB_CB(skb)->seq += copy; 2152 } 2153 2154 len += copy; --- 561 unchanged lines hidden (view full) --- 2716 if (next_skb_size <= skb_availroom(skb)) 2717 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), 2718 next_skb_size); 2719 else if (!skb_shift(skb, next_skb, next_skb_size)) 2720 return false; 2721 } 2722 tcp_highest_sack_replace(sk, next_skb, skb); 2723 | 2152 } else { 2153 __pskb_trim_head(skb, copy); 2154 tcp_set_skb_tso_segs(skb, mss_now); 2155 } 2156 TCP_SKB_CB(skb)->seq += copy; 2157 } 2158 2159 len += copy; --- 561 unchanged lines hidden (view full) --- 2721 if (next_skb_size <= skb_availroom(skb)) 2722 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), 2723 next_skb_size); 2724 else if (!skb_shift(skb, next_skb, next_skb_size)) 2725 return false; 2726 } 2727 tcp_highest_sack_replace(sk, next_skb, skb); 2728 |
2724 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 2725 skb->ip_summed = CHECKSUM_PARTIAL; 2726 2727 if (skb->ip_summed != CHECKSUM_PARTIAL) 2728 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); 2729 | |
2730 /* Update sequence range on original skb. */ 2731 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 2732 2733 /* Merge over control information. This moves PSH/FIN etc. over */ 2734 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; 2735 2736 /* All done, get rid of second SKB and account for it so 2737 * packet counting does not break. --- 1031 unchanged lines hidden --- | 2729 /* Update sequence range on original skb. */ 2730 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 2731 2732 /* Merge over control information. This moves PSH/FIN etc. over */ 2733 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; 2734 2735 /* All done, get rid of second SKB and account for it so 2736 * packet counting does not break. --- 1031 unchanged lines hidden --- |