tcp_bbr.c (30aa69e7bd9f7af3574120249eecb3726dcaf737) tcp_bbr.c (76a9ebe811fb3d0605cb084f1ae6be5610541865)
1/* Bottleneck Bandwidth and RTT (BBR) congestion control
2 *
3 * BBR congestion control computes the sending rate based on the delivery
4 * rate (throughput) estimated from ACKs. In a nutshell:
5 *
6 * On each ACK, update our model of the network path:
7 * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
8 * min_rtt = windowed_min(rtt, 10 seconds)

--- 114 unchanged lines hidden (view full) ---

123static const int bbr_bw_rtts = CYCLE_LEN + 2;
124/* Window length of min_rtt filter (in sec): */
125static const u32 bbr_min_rtt_win_sec = 10;
126/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
127static const u32 bbr_probe_rtt_mode_ms = 200;
128/* Skip TSO below the following bandwidth (bits/sec): */
129static const int bbr_min_tso_rate = 1200000;
130
1/* Bottleneck Bandwidth and RTT (BBR) congestion control
2 *
3 * BBR congestion control computes the sending rate based on the delivery
4 * rate (throughput) estimated from ACKs. In a nutshell:
5 *
6 * On each ACK, update our model of the network path:
7 * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
8 * min_rtt = windowed_min(rtt, 10 seconds)

--- 114 unchanged lines hidden (view full) ---

123static const int bbr_bw_rtts = CYCLE_LEN + 2;
124/* Window length of min_rtt filter (in sec): */
125static const u32 bbr_min_rtt_win_sec = 10;
126/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
127static const u32 bbr_probe_rtt_mode_ms = 200;
128/* Skip TSO below the following bandwidth (bits/sec): */
129static const int bbr_min_tso_rate = 1200000;
130
131/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */
132static const int bbr_pacing_marging_percent = 1;
133
131/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
132 * that will allow a smoothly increasing pacing rate that will double each RTT
133 * and send the same number of packets per RTT that an un-paced, slow-starting
134 * Reno or CUBIC flow would:
135 */
136static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
137/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
138 * the queue created in BBR_STARTUP in a single round:

--- 64 unchanged lines hidden (view full) ---

203/* Return rate in bytes per second, optionally with a gain.
204 * The order here is chosen carefully to avoid overflow of u64. This should
205 * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
206 */
207static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
208{
209 unsigned int mss = tcp_sk(sk)->mss_cache;
210
134/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
135 * that will allow a smoothly increasing pacing rate that will double each RTT
136 * and send the same number of packets per RTT that an un-paced, slow-starting
137 * Reno or CUBIC flow would:
138 */
139static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
140/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
141 * the queue created in BBR_STARTUP in a single round:

--- 64 unchanged lines hidden (view full) ---

206/* Return rate in bytes per second, optionally with a gain.
207 * The order here is chosen carefully to avoid overflow of u64. This should
208 * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
209 */
210static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
211{
212 unsigned int mss = tcp_sk(sk)->mss_cache;
213
211 if (!tcp_needs_internal_pacing(sk))
212 mss = tcp_mss_to_mtu(sk, mss);
213 rate *= mss;
214 rate *= gain;
215 rate >>= BBR_SCALE;
214 rate *= mss;
215 rate *= gain;
216 rate >>= BBR_SCALE;
216 rate *= USEC_PER_SEC;
217 rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_marging_percent);
217 return rate >> BW_SCALE;
218}
219
220/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
218 return rate >> BW_SCALE;
219}
220
221/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
221static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
222static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
222{
223 u64 rate = bw;
224
225 rate = bbr_rate_bytes_per_sec(sk, rate, gain);
226 rate = min_t(u64, rate, sk->sk_max_pacing_rate);
227 return rate;
228}
229

--- 22 unchanged lines hidden (view full) ---

252 * estimated bandwidth. This is an important aspect of the design. In this
253 * implementation this slightly lower pacing rate is achieved implicitly by not
254 * including link-layer headers in the packet size used for the pacing rate.
255 */
256static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
257{
258 struct tcp_sock *tp = tcp_sk(sk);
259 struct bbr *bbr = inet_csk_ca(sk);
223{
224 u64 rate = bw;
225
226 rate = bbr_rate_bytes_per_sec(sk, rate, gain);
227 rate = min_t(u64, rate, sk->sk_max_pacing_rate);
228 return rate;
229}
230

--- 22 unchanged lines hidden (view full) ---

253 * estimated bandwidth. This is an important aspect of the design. In this
254 * implementation this slightly lower pacing rate is achieved implicitly by not
255 * including link-layer headers in the packet size used for the pacing rate.
256 */
257static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
258{
259 struct tcp_sock *tp = tcp_sk(sk);
260 struct bbr *bbr = inet_csk_ca(sk);
260 u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
261 unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain);
261
262 if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
263 bbr_init_pacing_rate_from_rtt(sk);
264 if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
265 sk->sk_pacing_rate = rate;
266}
267
268/* override sysctl_tcp_min_tso_segs */

--- 5 unchanged lines hidden (view full) ---

274static u32 bbr_tso_segs_goal(struct sock *sk)
275{
276 struct tcp_sock *tp = tcp_sk(sk);
277 u32 segs, bytes;
278
279 /* Sort of tcp_tso_autosize() but ignoring
280 * driver provided sk_gso_max_size.
281 */
262
263 if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
264 bbr_init_pacing_rate_from_rtt(sk);
265 if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
266 sk->sk_pacing_rate = rate;
267}
268
269/* override sysctl_tcp_min_tso_segs */

--- 5 unchanged lines hidden (view full) ---

275static u32 bbr_tso_segs_goal(struct sock *sk)
276{
277 struct tcp_sock *tp = tcp_sk(sk);
278 u32 segs, bytes;
279
280 /* Sort of tcp_tso_autosize() but ignoring
281 * driver provided sk_gso_max_size.
282 */
282 bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
283 bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
283 GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
284 segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
285
286 return min(segs, 0x7FU);
287}
288
289/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
290static void bbr_save_cwnd(struct sock *sk)

--- 691 unchanged lines hidden ---
284 GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
285 segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
286
287 return min(segs, 0x7FU);
288}
289
290/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
291static void bbr_save_cwnd(struct sock *sk)

--- 691 unchanged lines hidden ---