tcp_bbr.c (dcb8c9b4373a583451b1b8a3e916d33de273633d) tcp_bbr.c (71abf467bb630c7e2f4ef33267d98fb7d10d3ce9)
1/* Bottleneck Bandwidth and RTT (BBR) congestion control
2 *
3 * BBR congestion control computes the sending rate based on the delivery
4 * rate (throughput) estimated from ACKs. In a nutshell:
5 *
6 * On each ACK, update our model of the network path:
7 * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
8 * min_rtt = windowed_min(rtt, 10 seconds)

--- 83 unchanged lines hidden (view full) ---

92 u32 rtt_cnt; /* count of packet-timed rounds elapsed */
93 u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
94 u64 cycle_mstamp; /* time of this cycle phase start */
95 u32 mode:3, /* current bbr_mode in state machine */
96 prev_ca_state:3, /* CA state on previous ACK */
97 packet_conservation:1, /* use packet conservation? */
98 restore_cwnd:1, /* decided to revert cwnd to old value */
99 round_start:1, /* start of packet-timed tx->ack round? */
1/* Bottleneck Bandwidth and RTT (BBR) congestion control
2 *
3 * BBR congestion control computes the sending rate based on the delivery
4 * rate (throughput) estimated from ACKs. In a nutshell:
5 *
6 * On each ACK, update our model of the network path:
7 * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
8 * min_rtt = windowed_min(rtt, 10 seconds)

--- 83 unchanged lines hidden (view full) ---

92 u32 rtt_cnt; /* count of packet-timed rounds elapsed */
93 u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
94 u64 cycle_mstamp; /* time of this cycle phase start */
95 u32 mode:3, /* current bbr_mode in state machine */
96 prev_ca_state:3, /* CA state on previous ACK */
97 packet_conservation:1, /* use packet conservation? */
98 restore_cwnd:1, /* decided to revert cwnd to old value */
99 round_start:1, /* start of packet-timed tx->ack round? */
100 tso_segs_goal:7, /* segments we want in each skb we send */
101 idle_restart:1, /* restarting after idle? */
102 probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
100 idle_restart:1, /* restarting after idle? */
101 probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
103 unused:5,
102 unused:12,
104 lt_is_sampling:1, /* taking long-term ("LT") samples now? */
105 lt_rtt_cnt:7, /* round trips in long-term interval */
106 lt_use_bw:1; /* use lt_bw as our bw estimate? */
107 u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */
108 u32 lt_last_delivered; /* LT intvl start: tp->delivered */
109 u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */
110 u32 lt_last_lost; /* LT intvl start: tp->lost */
111 u32 pacing_gain:10, /* current gain for setting pacing rate */

--- 150 unchanged lines hidden (view full) ---

262}
263
264/* override sysctl_tcp_min_tso_segs */
265static u32 bbr_min_tso_segs(struct sock *sk)
266{
267 return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
268}
269
103 lt_is_sampling:1, /* taking long-term ("LT") samples now? */
104 lt_rtt_cnt:7, /* round trips in long-term interval */
105 lt_use_bw:1; /* use lt_bw as our bw estimate? */
106 u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */
107 u32 lt_last_delivered; /* LT intvl start: tp->delivered */
108 u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */
109 u32 lt_last_lost; /* LT intvl start: tp->lost */
110 u32 pacing_gain:10, /* current gain for setting pacing rate */

--- 150 unchanged lines hidden (view full) ---

261}
262
263/* override sysctl_tcp_min_tso_segs */
264static u32 bbr_min_tso_segs(struct sock *sk)
265{
266 return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
267}
268
270static void bbr_set_tso_segs_goal(struct sock *sk)
269static u32 bbr_tso_segs_goal(struct sock *sk)
271{
272 struct tcp_sock *tp = tcp_sk(sk);
270{
271 struct tcp_sock *tp = tcp_sk(sk);
273 struct bbr *bbr = inet_csk_ca(sk);
274 u32 segs, bytes;
275
276 /* Sort of tcp_tso_autosize() but ignoring
277 * driver provided sk_gso_max_size.
278 */
279 bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
280 GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
281 segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
282
272 u32 segs, bytes;
273
274 /* Sort of tcp_tso_autosize() but ignoring
275 * driver provided sk_gso_max_size.
276 */
277 bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
278 GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
279 segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
280
283 bbr->tso_segs_goal = min(segs, 0x7FU);
281 return min(segs, 0x7FU);
284}
285
286/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
287static void bbr_save_cwnd(struct sock *sk)
288{
289 struct tcp_sock *tp = tcp_sk(sk);
290 struct bbr *bbr = inet_csk_ca(sk);
291

--- 54 unchanged lines hidden (view full) ---

346 return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/
347
348 w = (u64)bw * bbr->min_rtt_us;
349
350 /* Apply a gain to the given value, then remove the BW_SCALE shift. */
351 cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
352
353 /* Allow enough full-sized skbs in flight to utilize end systems. */
282}
283
284/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
285static void bbr_save_cwnd(struct sock *sk)
286{
287 struct tcp_sock *tp = tcp_sk(sk);
288 struct bbr *bbr = inet_csk_ca(sk);
289

--- 54 unchanged lines hidden (view full) ---

344 return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/
345
346 w = (u64)bw * bbr->min_rtt_us;
347
348 /* Apply a gain to the given value, then remove the BW_SCALE shift. */
349 cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
350
351 /* Allow enough full-sized skbs in flight to utilize end systems. */
354 cwnd += 3 * bbr->tso_segs_goal;
352 cwnd += 3 * bbr_tso_segs_goal(sk);
355
356 /* Reduce delayed ACKs by rounding up cwnd to the next even number. */
357 cwnd = (cwnd + 1) & ~1U;
358
359 return cwnd;
360}
361
362/* An optimization in BBR to reduce losses: On the first round of recovery, we

--- 459 unchanged lines hidden (view full) ---

822{
823 struct bbr *bbr = inet_csk_ca(sk);
824 u32 bw;
825
826 bbr_update_model(sk, rs);
827
828 bw = bbr_bw(sk);
829 bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
353
354 /* Reduce delayed ACKs by rounding up cwnd to the next even number. */
355 cwnd = (cwnd + 1) & ~1U;
356
357 return cwnd;
358}
359
360/* An optimization in BBR to reduce losses: On the first round of recovery, we

--- 459 unchanged lines hidden (view full) ---

820{
821 struct bbr *bbr = inet_csk_ca(sk);
822 u32 bw;
823
824 bbr_update_model(sk, rs);
825
826 bw = bbr_bw(sk);
827 bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
830 bbr_set_tso_segs_goal(sk);
831 bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
832}
833
834static void bbr_init(struct sock *sk)
835{
836 struct tcp_sock *tp = tcp_sk(sk);
837 struct bbr *bbr = inet_csk_ca(sk);
838
839 bbr->prior_cwnd = 0;
828 bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
829}
830
831static void bbr_init(struct sock *sk)
832{
833 struct tcp_sock *tp = tcp_sk(sk);
834 struct bbr *bbr = inet_csk_ca(sk);
835
836 bbr->prior_cwnd = 0;
840 bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
841 bbr->rtt_cnt = 0;
842 bbr->next_rtt_delivered = 0;
843 bbr->prev_ca_state = TCP_CA_Open;
844 bbr->packet_conservation = 0;
845
846 bbr->probe_rtt_done_stamp = 0;
847 bbr->probe_rtt_round_done = 0;
848 bbr->min_rtt_us = tcp_min_rtt(tp);

--- 118 unchanged lines hidden ---
837 bbr->rtt_cnt = 0;
838 bbr->next_rtt_delivered = 0;
839 bbr->prev_ca_state = TCP_CA_Open;
840 bbr->packet_conservation = 0;
841
842 bbr->probe_rtt_done_stamp = 0;
843 bbr->probe_rtt_round_done = 0;
844 bbr->min_rtt_us = tcp_min_rtt(tp);

--- 118 unchanged lines hidden ---