1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/crypto.h> 3 #include <linux/err.h> 4 #include <linux/init.h> 5 #include <linux/kernel.h> 6 #include <linux/list.h> 7 #include <linux/tcp.h> 8 #include <linux/rcupdate.h> 9 #include <linux/rculist.h> 10 #include <net/inetpeer.h> 11 #include <net/tcp.h> 12 13 void tcp_fastopen_init_key_once(struct net *net) 14 { 15 u8 key[TCP_FASTOPEN_KEY_LENGTH]; 16 struct tcp_fastopen_context *ctxt; 17 18 rcu_read_lock(); 19 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); 20 if (ctxt) { 21 rcu_read_unlock(); 22 return; 23 } 24 rcu_read_unlock(); 25 26 /* tcp_fastopen_reset_cipher publishes the new context 27 * atomically, so we allow this race happening here. 28 * 29 * All call sites of tcp_fastopen_cookie_gen also check 30 * for a valid cookie, so this is an acceptable risk. 31 */ 32 get_random_bytes(key, sizeof(key)); 33 tcp_fastopen_reset_cipher(net, NULL, key, NULL); 34 } 35 36 static void tcp_fastopen_ctx_free(struct rcu_head *head) 37 { 38 struct tcp_fastopen_context *ctx = 39 container_of(head, struct tcp_fastopen_context, rcu); 40 41 kzfree(ctx); 42 } 43 44 void tcp_fastopen_destroy_cipher(struct sock *sk) 45 { 46 struct tcp_fastopen_context *ctx; 47 48 ctx = rcu_dereference_protected( 49 inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1); 50 if (ctx) 51 call_rcu(&ctx->rcu, tcp_fastopen_ctx_free); 52 } 53 54 void tcp_fastopen_ctx_destroy(struct net *net) 55 { 56 struct tcp_fastopen_context *ctxt; 57 58 spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); 59 60 ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, 61 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); 62 rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL); 63 spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); 64 65 if (ctxt) 66 call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); 67 } 68 69 int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, 70 void *primary_key, void *backup_key) 71 { 72 struct tcp_fastopen_context *ctx, *octx; 73 struct fastopen_queue *q; 74 int err = 0; 75 76 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 77 if (!ctx) { 78 err = -ENOMEM; 79 goto out; 80 } 81 82 ctx->key[0].key[0] = get_unaligned_le64(primary_key); 83 ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8); 84 if (backup_key) { 85 ctx->key[1].key[0] = get_unaligned_le64(backup_key); 86 ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8); 87 ctx->num = 2; 88 } else { 89 ctx->num = 1; 90 } 91 92 spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); 93 if (sk) { 94 q = &inet_csk(sk)->icsk_accept_queue.fastopenq; 95 octx = rcu_dereference_protected(q->ctx, 96 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); 97 rcu_assign_pointer(q->ctx, ctx); 98 } else { 99 octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, 100 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); 101 rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); 102 } 103 spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); 104 105 if (octx) 106 call_rcu(&octx->rcu, tcp_fastopen_ctx_free); 107 out: 108 return err; 109 } 110 111 static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, 112 struct sk_buff *syn, 113 const siphash_key_t *key, 114 struct tcp_fastopen_cookie *foc) 115 { 116 BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); 117 118 if (req->rsk_ops->family == AF_INET) { 119 const struct iphdr *iph = ip_hdr(syn); 120 121 foc->val[0] = cpu_to_le64(siphash(&iph->saddr, 122 sizeof(iph->saddr) + 123 sizeof(iph->daddr), 124 key)); 125 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 126 return true; 127 } 128 #if IS_ENABLED(CONFIG_IPV6) 129 if (req->rsk_ops->family == AF_INET6) { 130 const struct ipv6hdr *ip6h = ipv6_hdr(syn); 131 132 foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr, 133 sizeof(ip6h->saddr) + 134 sizeof(ip6h->daddr), 135 key)); 136 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 137 return true; 138 } 139 #endif 140 return false; 141 } 142 143 /* Generate the fastopen cookie by applying SipHash to both the source and 144 * destination addresses. 145 */ 146 static void tcp_fastopen_cookie_gen(struct sock *sk, 147 struct request_sock *req, 148 struct sk_buff *syn, 149 struct tcp_fastopen_cookie *foc) 150 { 151 struct tcp_fastopen_context *ctx; 152 153 rcu_read_lock(); 154 ctx = tcp_fastopen_get_ctx(sk); 155 if (ctx) 156 __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc); 157 rcu_read_unlock(); 158 } 159 160 /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, 161 * queue this additional data / FIN. 162 */ 163 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) 164 { 165 struct tcp_sock *tp = tcp_sk(sk); 166 167 if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) 168 return; 169 170 skb = skb_clone(skb, GFP_ATOMIC); 171 if (!skb) 172 return; 173 174 skb_dst_drop(skb); 175 /* segs_in has been initialized to 1 in tcp_create_openreq_child(). 176 * Hence, reset segs_in to 0 before calling tcp_segs_in() 177 * to avoid double counting. Also, tcp_segs_in() expects 178 * skb->len to include the tcp_hdrlen. Hence, it should 179 * be called before __skb_pull(). 180 */ 181 tp->segs_in = 0; 182 tcp_segs_in(tp, skb); 183 __skb_pull(skb, tcp_hdrlen(skb)); 184 sk_forced_mem_schedule(sk, skb->truesize); 185 skb_set_owner_r(skb, sk); 186 187 TCP_SKB_CB(skb)->seq++; 188 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; 189 190 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 191 __skb_queue_tail(&sk->sk_receive_queue, skb); 192 tp->syn_data_acked = 1; 193 194 /* u64_stats_update_begin(&tp->syncp) not needed here, 195 * as we certainly are not changing upper 32bit value (0) 196 */ 197 tp->bytes_received = skb->len; 198 199 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 200 tcp_fin(sk); 201 } 202 203 /* returns 0 - no key match, 1 for primary, 2 for backup */ 204 static int tcp_fastopen_cookie_gen_check(struct sock *sk, 205 struct request_sock *req, 206 struct sk_buff *syn, 207 struct tcp_fastopen_cookie *orig, 208 struct tcp_fastopen_cookie *valid_foc) 209 { 210 struct tcp_fastopen_cookie search_foc = { .len = -1 }; 211 struct tcp_fastopen_cookie *foc = valid_foc; 212 struct tcp_fastopen_context *ctx; 213 int i, ret = 0; 214 215 rcu_read_lock(); 216 ctx = tcp_fastopen_get_ctx(sk); 217 if (!ctx) 218 goto out; 219 for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { 220 __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc); 221 if (tcp_fastopen_cookie_match(foc, orig)) { 222 ret = i + 1; 223 goto out; 224 } 225 foc = &search_foc; 226 } 227 out: 228 rcu_read_unlock(); 229 return ret; 230 } 231 232 static struct sock *tcp_fastopen_create_child(struct sock *sk, 233 struct sk_buff *skb, 234 struct request_sock *req) 235 { 236 struct tcp_sock *tp; 237 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 238 struct sock *child; 239 bool own_req; 240 241 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, 242 NULL, &own_req); 243 if (!child) 244 return NULL; 245 246 spin_lock(&queue->fastopenq.lock); 247 queue->fastopenq.qlen++; 248 spin_unlock(&queue->fastopenq.lock); 249 250 /* Initialize the child socket. Have to fix some values to take 251 * into account the child is a Fast Open socket and is created 252 * only out of the bits carried in the SYN packet. 253 */ 254 tp = tcp_sk(child); 255 256 rcu_assign_pointer(tp->fastopen_rsk, req); 257 tcp_rsk(req)->tfo_listener = true; 258 259 /* RFC1323: The window in SYN & SYN/ACK segments is never 260 * scaled. So correct it appropriately. 261 */ 262 tp->snd_wnd = ntohs(tcp_hdr(skb)->window); 263 tp->max_window = tp->snd_wnd; 264 265 /* Activate the retrans timer so that SYNACK can be retransmitted. 266 * The request socket is not added to the ehash 267 * because it's been added to the accept queue directly. 268 */ 269 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, 270 TCP_TIMEOUT_INIT, TCP_RTO_MAX); 271 272 refcount_set(&req->rsk_refcnt, 2); 273 274 /* Now finish processing the fastopen child socket. */ 275 tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); 276 277 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 278 279 tcp_fastopen_add_skb(child, skb); 280 281 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; 282 tp->rcv_wup = tp->rcv_nxt; 283 /* tcp_conn_request() is sending the SYNACK, 284 * and queues the child into listener accept queue. 285 */ 286 return child; 287 } 288 289 static bool tcp_fastopen_queue_check(struct sock *sk) 290 { 291 struct fastopen_queue *fastopenq; 292 293 /* Make sure the listener has enabled fastopen, and we don't 294 * exceed the max # of pending TFO requests allowed before trying 295 * to validating the cookie in order to avoid burning CPU cycles 296 * unnecessarily. 297 * 298 * XXX (TFO) - The implication of checking the max_qlen before 299 * processing a cookie request is that clients can't differentiate 300 * between qlen overflow causing Fast Open to be disabled 301 * temporarily vs a server not supporting Fast Open at all. 302 */ 303 fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; 304 if (fastopenq->max_qlen == 0) 305 return false; 306 307 if (fastopenq->qlen >= fastopenq->max_qlen) { 308 struct request_sock *req1; 309 spin_lock(&fastopenq->lock); 310 req1 = fastopenq->rskq_rst_head; 311 if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { 312 __NET_INC_STATS(sock_net(sk), 313 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); 314 spin_unlock(&fastopenq->lock); 315 return false; 316 } 317 fastopenq->rskq_rst_head = req1->dl_next; 318 fastopenq->qlen--; 319 spin_unlock(&fastopenq->lock); 320 reqsk_put(req1); 321 } 322 return true; 323 } 324 325 static bool tcp_fastopen_no_cookie(const struct sock *sk, 326 const struct dst_entry *dst, 327 int flag) 328 { 329 return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || 330 tcp_sk(sk)->fastopen_no_cookie || 331 (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); 332 } 333 334 /* Returns true if we should perform Fast Open on the SYN. The cookie (foc) 335 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open 336 * cookie request (foc->len == 0). 337 */ 338 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, 339 struct request_sock *req, 340 struct tcp_fastopen_cookie *foc, 341 const struct dst_entry *dst) 342 { 343 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; 344 int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; 345 struct tcp_fastopen_cookie valid_foc = { .len = -1 }; 346 struct sock *child; 347 int ret = 0; 348 349 if (foc->len == 0) /* Client requests a cookie */ 350 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); 351 352 if (!((tcp_fastopen & TFO_SERVER_ENABLE) && 353 (syn_data || foc->len >= 0) && 354 tcp_fastopen_queue_check(sk))) { 355 foc->len = -1; 356 return NULL; 357 } 358 359 if (syn_data && 360 tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) 361 goto fastopen; 362 363 if (foc->len == 0) { 364 /* Client requests a cookie. */ 365 tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc); 366 } else if (foc->len > 0) { 367 ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc, 368 &valid_foc); 369 if (!ret) { 370 NET_INC_STATS(sock_net(sk), 371 LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 372 } else { 373 /* Cookie is valid. Create a (full) child socket to 374 * accept the data in SYN before returning a SYN-ACK to 375 * ack the data. If we fail to create the socket, fall 376 * back and ack the ISN only but includes the same 377 * cookie. 378 * 379 * Note: Data-less SYN with valid cookie is allowed to 380 * send data in SYN_RECV state. 381 */ 382 fastopen: 383 child = tcp_fastopen_create_child(sk, skb, req); 384 if (child) { 385 if (ret == 2) { 386 valid_foc.exp = foc->exp; 387 *foc = valid_foc; 388 NET_INC_STATS(sock_net(sk), 389 LINUX_MIB_TCPFASTOPENPASSIVEALTKEY); 390 } else { 391 foc->len = -1; 392 } 393 NET_INC_STATS(sock_net(sk), 394 LINUX_MIB_TCPFASTOPENPASSIVE); 395 return child; 396 } 397 NET_INC_STATS(sock_net(sk), 398 LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 399 } 400 } 401 valid_foc.exp = foc->exp; 402 *foc = valid_foc; 403 return NULL; 404 } 405 406 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, 407 struct tcp_fastopen_cookie *cookie) 408 { 409 const struct dst_entry *dst; 410 411 tcp_fastopen_cache_get(sk, mss, cookie); 412 413 /* Firewall blackhole issue check */ 414 if (tcp_fastopen_active_should_disable(sk)) { 415 cookie->len = -1; 416 return false; 417 } 418 419 dst = __sk_dst_get(sk); 420 421 if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) { 422 cookie->len = -1; 423 return true; 424 } 425 return cookie->len > 0; 426 } 427 428 /* This function checks if we want to defer sending SYN until the first 429 * write(). We defer under the following conditions: 430 * 1. fastopen_connect sockopt is set 431 * 2. we have a valid cookie 432 * Return value: return true if we want to defer until application writes data 433 * return false if we want to send out SYN immediately 434 */ 435 bool tcp_fastopen_defer_connect(struct sock *sk, int *err) 436 { 437 struct tcp_fastopen_cookie cookie = { .len = 0 }; 438 struct tcp_sock *tp = tcp_sk(sk); 439 u16 mss; 440 441 if (tp->fastopen_connect && !tp->fastopen_req) { 442 if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) { 443 inet_sk(sk)->defer_connect = 1; 444 return true; 445 } 446 447 /* Alloc fastopen_req in order for FO option to be included 448 * in SYN 449 */ 450 tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), 451 sk->sk_allocation); 452 if (tp->fastopen_req) 453 tp->fastopen_req->cookie = cookie; 454 else 455 *err = -ENOBUFS; 456 } 457 return false; 458 } 459 EXPORT_SYMBOL(tcp_fastopen_defer_connect); 460 461 /* 462 * The following code block is to deal with middle box issues with TFO: 463 * Middlebox firewall issues can potentially cause server's data being 464 * blackholed after a successful 3WHS using TFO. 465 * The proposed solution is to disable active TFO globally under the 466 * following circumstances: 467 * 1. client side TFO socket receives out of order FIN 468 * 2. client side TFO socket receives out of order RST 469 * 3. client side TFO socket has timed out three times consecutively during 470 * or after handshake 471 * We disable active side TFO globally for 1hr at first. Then if it 472 * happens again, we disable it for 2h, then 4h, 8h, ... 473 * And we reset the timeout back to 1hr when we see a successful active 474 * TFO connection with data exchanges. 475 */ 476 477 /* Disable active TFO and record current jiffies and 478 * tfo_active_disable_times 479 */ 480 void tcp_fastopen_active_disable(struct sock *sk) 481 { 482 struct net *net = sock_net(sk); 483 484 atomic_inc(&net->ipv4.tfo_active_disable_times); 485 net->ipv4.tfo_active_disable_stamp = jiffies; 486 NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); 487 } 488 489 /* Calculate timeout for tfo active disable 490 * Return true if we are still in the active TFO disable period 491 * Return false if timeout already expired and we should use active TFO 492 */ 493 bool tcp_fastopen_active_should_disable(struct sock *sk) 494 { 495 unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; 496 int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); 497 unsigned long timeout; 498 int multiplier; 499 500 if (!tfo_da_times) 501 return false; 502 503 /* Limit timout to max: 2^6 * initial timeout */ 504 multiplier = 1 << min(tfo_da_times - 1, 6); 505 timeout = multiplier * tfo_bh_timeout * HZ; 506 if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) 507 return true; 508 509 /* Mark check bit so we can check for successful active TFO 510 * condition and reset tfo_active_disable_times 511 */ 512 tcp_sk(sk)->syn_fastopen_ch = 1; 513 return false; 514 } 515 516 /* Disable active TFO if FIN is the only packet in the ofo queue 517 * and no data is received. 518 * Also check if we can reset tfo_active_disable_times if data is 519 * received successfully on a marked active TFO sockets opened on 520 * a non-loopback interface 521 */ 522 void tcp_fastopen_active_disable_ofo_check(struct sock *sk) 523 { 524 struct tcp_sock *tp = tcp_sk(sk); 525 struct dst_entry *dst; 526 struct sk_buff *skb; 527 528 if (!tp->syn_fastopen) 529 return; 530 531 if (!tp->data_segs_in) { 532 skb = skb_rb_first(&tp->out_of_order_queue); 533 if (skb && !skb_rb_next(skb)) { 534 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { 535 tcp_fastopen_active_disable(sk); 536 return; 537 } 538 } 539 } else if (tp->syn_fastopen_ch && 540 atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { 541 dst = sk_dst_get(sk); 542 if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) 543 atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); 544 dst_release(dst); 545 } 546 } 547 548 void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired) 549 { 550 u32 timeouts = inet_csk(sk)->icsk_retransmits; 551 struct tcp_sock *tp = tcp_sk(sk); 552 553 /* Broken middle-boxes may black-hole Fast Open connection during or 554 * even after the handshake. Be extremely conservative and pause 555 * Fast Open globally after hitting the third consecutive timeout or 556 * exceeding the configured timeout limit. 557 */ 558 if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) && 559 (timeouts == 2 || (timeouts < 2 && expired))) { 560 tcp_fastopen_active_disable(sk); 561 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); 562 } 563 } 564