1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 152 struct tcp_sock *tp = tcp_sk(sk); 153 struct in6_addr *saddr = NULL, *final_p, final; 154 struct ipv6_txoptions *opt; 155 struct flowi6 fl6; 156 struct dst_entry *dst; 157 int addr_type; 158 int err; 159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (__ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 err = inet6_hash_connect(tcp_death_row, sk); 312 if (err) 313 goto late_failure; 314 315 sk_set_txhash(sk); 316 317 if (likely(!tp->repair)) { 318 if (!tp->write_seq) 319 WRITE_ONCE(tp->write_seq, 320 secure_tcpv6_seq(np->saddr.s6_addr32, 321 sk->sk_v6_daddr.s6_addr32, 322 inet->inet_sport, 323 inet->inet_dport)); 324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 325 np->saddr.s6_addr32, 326 sk->sk_v6_daddr.s6_addr32); 327 } 328 329 if (tcp_fastopen_defer_connect(sk, &err)) 330 return err; 331 if (err) 332 goto late_failure; 333 334 err = tcp_connect(sk); 335 if (err) 336 goto late_failure; 337 338 return 0; 339 340 late_failure: 341 tcp_set_state(sk, TCP_CLOSE); 342 failure: 343 inet->inet_dport = 0; 344 sk->sk_route_caps = 0; 345 return err; 346 } 347 348 static void tcp_v6_mtu_reduced(struct sock *sk) 349 { 350 struct dst_entry *dst; 351 u32 mtu; 352 353 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 354 return; 355 356 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 357 358 /* Drop requests trying to increase our current mss. 359 * Check done in __ip6_rt_update_pmtu() is too late. 360 */ 361 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 362 return; 363 364 dst = inet6_csk_update_pmtu(sk, mtu); 365 if (!dst) 366 return; 367 368 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 369 tcp_sync_mss(sk, dst_mtu(dst)); 370 tcp_simple_retransmit(sk); 371 } 372 } 373 374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 375 u8 type, u8 code, int offset, __be32 info) 376 { 377 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 378 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 379 struct net *net = dev_net(skb->dev); 380 struct request_sock *fastopen; 381 struct ipv6_pinfo *np; 382 struct tcp_sock *tp; 383 __u32 seq, snd_una; 384 struct sock *sk; 385 bool fatal; 386 int err; 387 388 sk = __inet6_lookup_established(net, &tcp_hashinfo, 389 &hdr->daddr, th->dest, 390 &hdr->saddr, ntohs(th->source), 391 skb->dev->ifindex, inet6_sdif(skb)); 392 393 if (!sk) { 394 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 395 ICMP6_MIB_INERRORS); 396 return -ENOENT; 397 } 398 399 if (sk->sk_state == TCP_TIME_WAIT) { 400 inet_twsk_put(inet_twsk(sk)); 401 return 0; 402 } 403 seq = ntohl(th->seq); 404 fatal = icmpv6_err_convert(type, code, &err); 405 if (sk->sk_state == TCP_NEW_SYN_RECV) { 406 tcp_req_err(sk, seq, fatal); 407 return 0; 408 } 409 410 bh_lock_sock(sk); 411 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 412 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 413 414 if (sk->sk_state == TCP_CLOSE) 415 goto out; 416 417 if (static_branch_unlikely(&ip6_min_hopcount)) { 418 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 419 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 420 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 421 goto out; 422 } 423 } 424 425 tp = tcp_sk(sk); 426 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 427 fastopen = rcu_dereference(tp->fastopen_rsk); 428 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 429 if (sk->sk_state != TCP_LISTEN && 430 !between(seq, snd_una, tp->snd_nxt)) { 431 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 432 goto out; 433 } 434 435 np = tcp_inet6_sk(sk); 436 437 if (type == NDISC_REDIRECT) { 438 if (!sock_owned_by_user(sk)) { 439 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 440 441 if (dst) 442 dst->ops->redirect(dst, sk, skb); 443 } 444 goto out; 445 } 446 447 if (type == ICMPV6_PKT_TOOBIG) { 448 u32 mtu = ntohl(info); 449 450 /* We are not interested in TCP_LISTEN and open_requests 451 * (SYN-ACKs send out by Linux are always <576bytes so 452 * they should go through unfragmented). 453 */ 454 if (sk->sk_state == TCP_LISTEN) 455 goto out; 456 457 if (!ip6_sk_accept_pmtu(sk)) 458 goto out; 459 460 if (mtu < IPV6_MIN_MTU) 461 goto out; 462 463 WRITE_ONCE(tp->mtu_info, mtu); 464 465 if (!sock_owned_by_user(sk)) 466 tcp_v6_mtu_reduced(sk); 467 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 468 &sk->sk_tsq_flags)) 469 sock_hold(sk); 470 goto out; 471 } 472 473 474 /* Might be for an request_sock */ 475 switch (sk->sk_state) { 476 case TCP_SYN_SENT: 477 case TCP_SYN_RECV: 478 /* Only in fast or simultaneous open. If a fast open socket is 479 * already accepted it is treated as a connected one below. 480 */ 481 if (fastopen && !fastopen->sk) 482 break; 483 484 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 485 486 if (!sock_owned_by_user(sk)) { 487 sk->sk_err = err; 488 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 489 490 tcp_done(sk); 491 } else 492 sk->sk_err_soft = err; 493 goto out; 494 case TCP_LISTEN: 495 break; 496 default: 497 /* check if this ICMP message allows revert of backoff. 498 * (see RFC 6069) 499 */ 500 if (!fastopen && type == ICMPV6_DEST_UNREACH && 501 code == ICMPV6_NOROUTE) 502 tcp_ld_RTO_revert(sk, seq); 503 } 504 505 if (!sock_owned_by_user(sk) && np->recverr) { 506 sk->sk_err = err; 507 sk_error_report(sk); 508 } else 509 sk->sk_err_soft = err; 510 511 out: 512 bh_unlock_sock(sk); 513 sock_put(sk); 514 return 0; 515 } 516 517 518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 519 struct flowi *fl, 520 struct request_sock *req, 521 struct tcp_fastopen_cookie *foc, 522 enum tcp_synack_type synack_type, 523 struct sk_buff *syn_skb) 524 { 525 struct inet_request_sock *ireq = inet_rsk(req); 526 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 527 struct ipv6_txoptions *opt; 528 struct flowi6 *fl6 = &fl->u.ip6; 529 struct sk_buff *skb; 530 int err = -ENOMEM; 531 u8 tclass; 532 533 /* First, grab a route. */ 534 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 535 IPPROTO_TCP)) == NULL) 536 goto done; 537 538 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 539 540 if (skb) { 541 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 542 &ireq->ir_v6_rmt_addr); 543 544 fl6->daddr = ireq->ir_v6_rmt_addr; 545 if (np->repflow && ireq->pktopts) 546 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 547 548 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? 549 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 550 (np->tclass & INET_ECN_MASK) : 551 np->tclass; 552 553 if (!INET_ECN_is_capable(tclass) && 554 tcp_bpf_ca_needs_ecn((struct sock *)req)) 555 tclass |= INET_ECN_ECT_0; 556 557 rcu_read_lock(); 558 opt = ireq->ipv6_opt; 559 if (!opt) 560 opt = rcu_dereference(np->opt); 561 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 562 tclass, sk->sk_priority); 563 rcu_read_unlock(); 564 err = net_xmit_eval(err); 565 } 566 567 done: 568 return err; 569 } 570 571 572 static void tcp_v6_reqsk_destructor(struct request_sock *req) 573 { 574 kfree(inet_rsk(req)->ipv6_opt); 575 consume_skb(inet_rsk(req)->pktopts); 576 } 577 578 #ifdef CONFIG_TCP_MD5SIG 579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 580 const struct in6_addr *addr, 581 int l3index) 582 { 583 return tcp_md5_do_lookup(sk, l3index, 584 (union tcp_md5_addr *)addr, AF_INET6); 585 } 586 587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 588 const struct sock *addr_sk) 589 { 590 int l3index; 591 592 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 593 addr_sk->sk_bound_dev_if); 594 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 595 l3index); 596 } 597 598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 599 sockptr_t optval, int optlen) 600 { 601 struct tcp_md5sig cmd; 602 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 603 int l3index = 0; 604 u8 prefixlen; 605 u8 flags; 606 607 if (optlen < sizeof(cmd)) 608 return -EINVAL; 609 610 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 611 return -EFAULT; 612 613 if (sin6->sin6_family != AF_INET6) 614 return -EINVAL; 615 616 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 617 618 if (optname == TCP_MD5SIG_EXT && 619 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 620 prefixlen = cmd.tcpm_prefixlen; 621 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 622 prefixlen > 32)) 623 return -EINVAL; 624 } else { 625 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 626 } 627 628 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 630 struct net_device *dev; 631 632 rcu_read_lock(); 633 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 634 if (dev && netif_is_l3_master(dev)) 635 l3index = dev->ifindex; 636 rcu_read_unlock(); 637 638 /* ok to reference set/not set outside of rcu; 639 * right now device MUST be an L3 master 640 */ 641 if (!dev || !l3index) 642 return -EINVAL; 643 } 644 645 if (!cmd.tcpm_keylen) { 646 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 647 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 648 AF_INET, prefixlen, 649 l3index, flags); 650 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 651 AF_INET6, prefixlen, l3index, flags); 652 } 653 654 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 655 return -EINVAL; 656 657 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 658 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 659 AF_INET, prefixlen, l3index, flags, 660 cmd.tcpm_key, cmd.tcpm_keylen, 661 GFP_KERNEL); 662 663 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 664 AF_INET6, prefixlen, l3index, flags, 665 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 666 } 667 668 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 669 const struct in6_addr *daddr, 670 const struct in6_addr *saddr, 671 const struct tcphdr *th, int nbytes) 672 { 673 struct tcp6_pseudohdr *bp; 674 struct scatterlist sg; 675 struct tcphdr *_th; 676 677 bp = hp->scratch; 678 /* 1. TCP pseudo-header (RFC2460) */ 679 bp->saddr = *saddr; 680 bp->daddr = *daddr; 681 bp->protocol = cpu_to_be32(IPPROTO_TCP); 682 bp->len = cpu_to_be32(nbytes); 683 684 _th = (struct tcphdr *)(bp + 1); 685 memcpy(_th, th, sizeof(*th)); 686 _th->check = 0; 687 688 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 689 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 690 sizeof(*bp) + sizeof(*th)); 691 return crypto_ahash_update(hp->md5_req); 692 } 693 694 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 695 const struct in6_addr *daddr, struct in6_addr *saddr, 696 const struct tcphdr *th) 697 { 698 struct tcp_md5sig_pool *hp; 699 struct ahash_request *req; 700 701 hp = tcp_get_md5sig_pool(); 702 if (!hp) 703 goto clear_hash_noput; 704 req = hp->md5_req; 705 706 if (crypto_ahash_init(req)) 707 goto clear_hash; 708 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 709 goto clear_hash; 710 if (tcp_md5_hash_key(hp, key)) 711 goto clear_hash; 712 ahash_request_set_crypt(req, NULL, md5_hash, 0); 713 if (crypto_ahash_final(req)) 714 goto clear_hash; 715 716 tcp_put_md5sig_pool(); 717 return 0; 718 719 clear_hash: 720 tcp_put_md5sig_pool(); 721 clear_hash_noput: 722 memset(md5_hash, 0, 16); 723 return 1; 724 } 725 726 static int tcp_v6_md5_hash_skb(char *md5_hash, 727 const struct tcp_md5sig_key *key, 728 const struct sock *sk, 729 const struct sk_buff *skb) 730 { 731 const struct in6_addr *saddr, *daddr; 732 struct tcp_md5sig_pool *hp; 733 struct ahash_request *req; 734 const struct tcphdr *th = tcp_hdr(skb); 735 736 if (sk) { /* valid for establish/request sockets */ 737 saddr = &sk->sk_v6_rcv_saddr; 738 daddr = &sk->sk_v6_daddr; 739 } else { 740 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 741 saddr = &ip6h->saddr; 742 daddr = &ip6h->daddr; 743 } 744 745 hp = tcp_get_md5sig_pool(); 746 if (!hp) 747 goto clear_hash_noput; 748 req = hp->md5_req; 749 750 if (crypto_ahash_init(req)) 751 goto clear_hash; 752 753 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 754 goto clear_hash; 755 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 756 goto clear_hash; 757 if (tcp_md5_hash_key(hp, key)) 758 goto clear_hash; 759 ahash_request_set_crypt(req, NULL, md5_hash, 0); 760 if (crypto_ahash_final(req)) 761 goto clear_hash; 762 763 tcp_put_md5sig_pool(); 764 return 0; 765 766 clear_hash: 767 tcp_put_md5sig_pool(); 768 clear_hash_noput: 769 memset(md5_hash, 0, 16); 770 return 1; 771 } 772 773 #endif 774 775 static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 776 const struct sk_buff *skb, 777 int dif, int sdif) 778 { 779 #ifdef CONFIG_TCP_MD5SIG 780 const __u8 *hash_location = NULL; 781 struct tcp_md5sig_key *hash_expected; 782 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 783 const struct tcphdr *th = tcp_hdr(skb); 784 int genhash, l3index; 785 u8 newhash[16]; 786 787 /* sdif set, means packet ingressed via a device 788 * in an L3 domain and dif is set to the l3mdev 789 */ 790 l3index = sdif ? dif : 0; 791 792 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); 793 hash_location = tcp_parse_md5sig_option(th); 794 795 /* We've parsed the options - do we have a hash? */ 796 if (!hash_expected && !hash_location) 797 return false; 798 799 if (hash_expected && !hash_location) { 800 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 801 return true; 802 } 803 804 if (!hash_expected && hash_location) { 805 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 806 return true; 807 } 808 809 /* check the signature */ 810 genhash = tcp_v6_md5_hash_skb(newhash, 811 hash_expected, 812 NULL, skb); 813 814 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 815 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 816 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", 817 genhash ? "failed" : "mismatch", 818 &ip6h->saddr, ntohs(th->source), 819 &ip6h->daddr, ntohs(th->dest), l3index); 820 return true; 821 } 822 #endif 823 return false; 824 } 825 826 static void tcp_v6_init_req(struct request_sock *req, 827 const struct sock *sk_listener, 828 struct sk_buff *skb) 829 { 830 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 831 struct inet_request_sock *ireq = inet_rsk(req); 832 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 833 834 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 835 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 836 837 /* So that link locals have meaning */ 838 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 839 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 840 ireq->ir_iif = tcp_v6_iif(skb); 841 842 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 843 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 844 np->rxopt.bits.rxinfo || 845 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 846 np->rxopt.bits.rxohlim || np->repflow)) { 847 refcount_inc(&skb->users); 848 ireq->pktopts = skb; 849 } 850 } 851 852 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 853 struct sk_buff *skb, 854 struct flowi *fl, 855 struct request_sock *req) 856 { 857 tcp_v6_init_req(req, sk, skb); 858 859 if (security_inet_conn_request(sk, skb, req)) 860 return NULL; 861 862 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 863 } 864 865 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 866 .family = AF_INET6, 867 .obj_size = sizeof(struct tcp6_request_sock), 868 .rtx_syn_ack = tcp_rtx_synack, 869 .send_ack = tcp_v6_reqsk_send_ack, 870 .destructor = tcp_v6_reqsk_destructor, 871 .send_reset = tcp_v6_send_reset, 872 .syn_ack_timeout = tcp_syn_ack_timeout, 873 }; 874 875 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 876 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 877 sizeof(struct ipv6hdr), 878 #ifdef CONFIG_TCP_MD5SIG 879 .req_md5_lookup = tcp_v6_md5_lookup, 880 .calc_md5_hash = tcp_v6_md5_hash_skb, 881 #endif 882 #ifdef CONFIG_SYN_COOKIES 883 .cookie_init_seq = cookie_v6_init_sequence, 884 #endif 885 .route_req = tcp_v6_route_req, 886 .init_seq = tcp_v6_init_seq, 887 .init_ts_off = tcp_v6_init_ts_off, 888 .send_synack = tcp_v6_send_synack, 889 }; 890 891 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 892 u32 ack, u32 win, u32 tsval, u32 tsecr, 893 int oif, struct tcp_md5sig_key *key, int rst, 894 u8 tclass, __be32 label, u32 priority) 895 { 896 const struct tcphdr *th = tcp_hdr(skb); 897 struct tcphdr *t1; 898 struct sk_buff *buff; 899 struct flowi6 fl6; 900 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 901 struct sock *ctl_sk = net->ipv6.tcp_sk; 902 unsigned int tot_len = sizeof(struct tcphdr); 903 __be32 mrst = 0, *topt; 904 struct dst_entry *dst; 905 __u32 mark = 0; 906 907 if (tsecr) 908 tot_len += TCPOLEN_TSTAMP_ALIGNED; 909 #ifdef CONFIG_TCP_MD5SIG 910 if (key) 911 tot_len += TCPOLEN_MD5SIG_ALIGNED; 912 #endif 913 914 #ifdef CONFIG_MPTCP 915 if (rst && !key) { 916 mrst = mptcp_reset_option(skb); 917 918 if (mrst) 919 tot_len += sizeof(__be32); 920 } 921 #endif 922 923 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, 924 GFP_ATOMIC); 925 if (!buff) 926 return; 927 928 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); 929 930 t1 = skb_push(buff, tot_len); 931 skb_reset_transport_header(buff); 932 933 /* Swap the send and the receive. */ 934 memset(t1, 0, sizeof(*t1)); 935 t1->dest = th->source; 936 t1->source = th->dest; 937 t1->doff = tot_len / 4; 938 t1->seq = htonl(seq); 939 t1->ack_seq = htonl(ack); 940 t1->ack = !rst || !th->ack; 941 t1->rst = rst; 942 t1->window = htons(win); 943 944 topt = (__be32 *)(t1 + 1); 945 946 if (tsecr) { 947 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 948 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 949 *topt++ = htonl(tsval); 950 *topt++ = htonl(tsecr); 951 } 952 953 if (mrst) 954 *topt++ = mrst; 955 956 #ifdef CONFIG_TCP_MD5SIG 957 if (key) { 958 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 959 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 960 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 961 &ipv6_hdr(skb)->saddr, 962 &ipv6_hdr(skb)->daddr, t1); 963 } 964 #endif 965 966 memset(&fl6, 0, sizeof(fl6)); 967 fl6.daddr = ipv6_hdr(skb)->saddr; 968 fl6.saddr = ipv6_hdr(skb)->daddr; 969 fl6.flowlabel = label; 970 971 buff->ip_summed = CHECKSUM_PARTIAL; 972 973 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 974 975 fl6.flowi6_proto = IPPROTO_TCP; 976 if (rt6_need_strict(&fl6.daddr) && !oif) 977 fl6.flowi6_oif = tcp_v6_iif(skb); 978 else { 979 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 980 oif = skb->skb_iif; 981 982 fl6.flowi6_oif = oif; 983 } 984 985 if (sk) { 986 if (sk->sk_state == TCP_TIME_WAIT) { 987 mark = inet_twsk(sk)->tw_mark; 988 /* autoflowlabel relies on buff->hash */ 989 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 990 PKT_HASH_TYPE_L4); 991 } else { 992 mark = sk->sk_mark; 993 } 994 buff->tstamp = tcp_transmit_time(sk); 995 } 996 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 997 fl6.fl6_dport = t1->dest; 998 fl6.fl6_sport = t1->source; 999 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 1000 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 1001 1002 /* Pass a socket to ip6_dst_lookup either it is for RST 1003 * Underlying function will use this to retrieve the network 1004 * namespace 1005 */ 1006 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); 1007 if (!IS_ERR(dst)) { 1008 skb_dst_set(buff, dst); 1009 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 1010 tclass & ~INET_ECN_MASK, priority); 1011 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 1012 if (rst) 1013 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 1014 return; 1015 } 1016 1017 kfree_skb(buff); 1018 } 1019 1020 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 1021 { 1022 const struct tcphdr *th = tcp_hdr(skb); 1023 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1024 u32 seq = 0, ack_seq = 0; 1025 struct tcp_md5sig_key *key = NULL; 1026 #ifdef CONFIG_TCP_MD5SIG 1027 const __u8 *hash_location = NULL; 1028 unsigned char newhash[16]; 1029 int genhash; 1030 struct sock *sk1 = NULL; 1031 #endif 1032 __be32 label = 0; 1033 u32 priority = 0; 1034 struct net *net; 1035 int oif = 0; 1036 1037 if (th->rst) 1038 return; 1039 1040 /* If sk not NULL, it means we did a successful lookup and incoming 1041 * route had to be correct. prequeue might have dropped our dst. 1042 */ 1043 if (!sk && !ipv6_unicast_destination(skb)) 1044 return; 1045 1046 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1047 #ifdef CONFIG_TCP_MD5SIG 1048 rcu_read_lock(); 1049 hash_location = tcp_parse_md5sig_option(th); 1050 if (sk && sk_fullsock(sk)) { 1051 int l3index; 1052 1053 /* sdif set, means packet ingressed via a device 1054 * in an L3 domain and inet_iif is set to it. 1055 */ 1056 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1057 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1058 } else if (hash_location) { 1059 int dif = tcp_v6_iif_l3_slave(skb); 1060 int sdif = tcp_v6_sdif(skb); 1061 int l3index; 1062 1063 /* 1064 * active side is lost. Try to find listening socket through 1065 * source port, and then find md5 key through listening socket. 1066 * we are not loose security here: 1067 * Incoming packet is checked with md5 hash with finding key, 1068 * no RST generated if md5 hash doesn't match. 1069 */ 1070 sk1 = inet6_lookup_listener(net, 1071 &tcp_hashinfo, NULL, 0, 1072 &ipv6h->saddr, 1073 th->source, &ipv6h->daddr, 1074 ntohs(th->source), dif, sdif); 1075 if (!sk1) 1076 goto out; 1077 1078 /* sdif set, means packet ingressed via a device 1079 * in an L3 domain and dif is set to it. 1080 */ 1081 l3index = tcp_v6_sdif(skb) ? dif : 0; 1082 1083 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1084 if (!key) 1085 goto out; 1086 1087 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1088 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1089 goto out; 1090 } 1091 #endif 1092 1093 if (th->ack) 1094 seq = ntohl(th->ack_seq); 1095 else 1096 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1097 (th->doff << 2); 1098 1099 if (sk) { 1100 oif = sk->sk_bound_dev_if; 1101 if (sk_fullsock(sk)) { 1102 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1103 1104 trace_tcp_send_reset(sk, skb); 1105 if (np->repflow) 1106 label = ip6_flowlabel(ipv6h); 1107 priority = sk->sk_priority; 1108 } 1109 if (sk->sk_state == TCP_TIME_WAIT) { 1110 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1111 priority = inet_twsk(sk)->tw_priority; 1112 } 1113 } else { 1114 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1115 label = ip6_flowlabel(ipv6h); 1116 } 1117 1118 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1119 ipv6_get_dsfield(ipv6h), label, priority); 1120 1121 #ifdef CONFIG_TCP_MD5SIG 1122 out: 1123 rcu_read_unlock(); 1124 #endif 1125 } 1126 1127 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1128 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1129 struct tcp_md5sig_key *key, u8 tclass, 1130 __be32 label, u32 priority) 1131 { 1132 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1133 tclass, label, priority); 1134 } 1135 1136 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1137 { 1138 struct inet_timewait_sock *tw = inet_twsk(sk); 1139 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1140 1141 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1142 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1143 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1144 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1145 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1146 1147 inet_twsk_put(tw); 1148 } 1149 1150 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1151 struct request_sock *req) 1152 { 1153 int l3index; 1154 1155 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1156 1157 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1158 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1159 */ 1160 /* RFC 7323 2.3 1161 * The window field (SEG.WND) of every outgoing segment, with the 1162 * exception of <SYN> segments, MUST be right-shifted by 1163 * Rcv.Wind.Shift bits: 1164 */ 1165 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1166 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1167 tcp_rsk(req)->rcv_nxt, 1168 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1169 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1170 req->ts_recent, sk->sk_bound_dev_if, 1171 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1172 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1173 } 1174 1175 1176 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1177 { 1178 #ifdef CONFIG_SYN_COOKIES 1179 const struct tcphdr *th = tcp_hdr(skb); 1180 1181 if (!th->syn) 1182 sk = cookie_v6_check(sk, skb); 1183 #endif 1184 return sk; 1185 } 1186 1187 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1188 struct tcphdr *th, u32 *cookie) 1189 { 1190 u16 mss = 0; 1191 #ifdef CONFIG_SYN_COOKIES 1192 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1193 &tcp_request_sock_ipv6_ops, sk, th); 1194 if (mss) { 1195 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1196 tcp_synq_overflow(sk); 1197 } 1198 #endif 1199 return mss; 1200 } 1201 1202 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1203 { 1204 if (skb->protocol == htons(ETH_P_IP)) 1205 return tcp_v4_conn_request(sk, skb); 1206 1207 if (!ipv6_unicast_destination(skb)) 1208 goto drop; 1209 1210 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1211 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1212 return 0; 1213 } 1214 1215 return tcp_conn_request(&tcp6_request_sock_ops, 1216 &tcp_request_sock_ipv6_ops, sk, skb); 1217 1218 drop: 1219 tcp_listendrop(sk); 1220 return 0; /* don't send reset */ 1221 } 1222 1223 static void tcp_v6_restore_cb(struct sk_buff *skb) 1224 { 1225 /* We need to move header back to the beginning if xfrm6_policy_check() 1226 * and tcp_v6_fill_cb() are going to be called again. 1227 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1228 */ 1229 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1230 sizeof(struct inet6_skb_parm)); 1231 } 1232 1233 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1234 struct request_sock *req, 1235 struct dst_entry *dst, 1236 struct request_sock *req_unhash, 1237 bool *own_req) 1238 { 1239 struct inet_request_sock *ireq; 1240 struct ipv6_pinfo *newnp; 1241 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1242 struct ipv6_txoptions *opt; 1243 struct inet_sock *newinet; 1244 bool found_dup_sk = false; 1245 struct tcp_sock *newtp; 1246 struct sock *newsk; 1247 #ifdef CONFIG_TCP_MD5SIG 1248 struct tcp_md5sig_key *key; 1249 int l3index; 1250 #endif 1251 struct flowi6 fl6; 1252 1253 if (skb->protocol == htons(ETH_P_IP)) { 1254 /* 1255 * v6 mapped 1256 */ 1257 1258 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1259 req_unhash, own_req); 1260 1261 if (!newsk) 1262 return NULL; 1263 1264 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1265 1266 newnp = tcp_inet6_sk(newsk); 1267 newtp = tcp_sk(newsk); 1268 1269 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1270 1271 newnp->saddr = newsk->sk_v6_rcv_saddr; 1272 1273 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1274 if (sk_is_mptcp(newsk)) 1275 mptcpv6_handle_mapped(newsk, true); 1276 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1277 #ifdef CONFIG_TCP_MD5SIG 1278 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1279 #endif 1280 1281 newnp->ipv6_mc_list = NULL; 1282 newnp->ipv6_ac_list = NULL; 1283 newnp->ipv6_fl_list = NULL; 1284 newnp->pktoptions = NULL; 1285 newnp->opt = NULL; 1286 newnp->mcast_oif = inet_iif(skb); 1287 newnp->mcast_hops = ip_hdr(skb)->ttl; 1288 newnp->rcv_flowinfo = 0; 1289 if (np->repflow) 1290 newnp->flow_label = 0; 1291 1292 /* 1293 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1294 * here, tcp_create_openreq_child now does this for us, see the comment in 1295 * that function for the gory details. -acme 1296 */ 1297 1298 /* It is tricky place. Until this moment IPv4 tcp 1299 worked with IPv6 icsk.icsk_af_ops. 1300 Sync it now. 1301 */ 1302 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1303 1304 return newsk; 1305 } 1306 1307 ireq = inet_rsk(req); 1308 1309 if (sk_acceptq_is_full(sk)) 1310 goto out_overflow; 1311 1312 if (!dst) { 1313 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1314 if (!dst) 1315 goto out; 1316 } 1317 1318 newsk = tcp_create_openreq_child(sk, req, skb); 1319 if (!newsk) 1320 goto out_nonewsk; 1321 1322 /* 1323 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1324 * count here, tcp_create_openreq_child now does this for us, see the 1325 * comment in that function for the gory details. -acme 1326 */ 1327 1328 newsk->sk_gso_type = SKB_GSO_TCPV6; 1329 ip6_dst_store(newsk, dst, NULL, NULL); 1330 inet6_sk_rx_dst_set(newsk, skb); 1331 1332 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1333 1334 newtp = tcp_sk(newsk); 1335 newinet = inet_sk(newsk); 1336 newnp = tcp_inet6_sk(newsk); 1337 1338 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1339 1340 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1341 newnp->saddr = ireq->ir_v6_loc_addr; 1342 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1343 newsk->sk_bound_dev_if = ireq->ir_iif; 1344 1345 /* Now IPv6 options... 1346 1347 First: no IPv4 options. 1348 */ 1349 newinet->inet_opt = NULL; 1350 newnp->ipv6_mc_list = NULL; 1351 newnp->ipv6_ac_list = NULL; 1352 newnp->ipv6_fl_list = NULL; 1353 1354 /* Clone RX bits */ 1355 newnp->rxopt.all = np->rxopt.all; 1356 1357 newnp->pktoptions = NULL; 1358 newnp->opt = NULL; 1359 newnp->mcast_oif = tcp_v6_iif(skb); 1360 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1361 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1362 if (np->repflow) 1363 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1364 1365 /* Set ToS of the new socket based upon the value of incoming SYN. 1366 * ECT bits are set later in tcp_init_transfer(). 1367 */ 1368 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) 1369 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1370 1371 /* Clone native IPv6 options from listening socket (if any) 1372 1373 Yes, keeping reference count would be much more clever, 1374 but we make one more one thing there: reattach optmem 1375 to newsk. 1376 */ 1377 opt = ireq->ipv6_opt; 1378 if (!opt) 1379 opt = rcu_dereference(np->opt); 1380 if (opt) { 1381 opt = ipv6_dup_options(newsk, opt); 1382 RCU_INIT_POINTER(newnp->opt, opt); 1383 } 1384 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1385 if (opt) 1386 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1387 opt->opt_flen; 1388 1389 tcp_ca_openreq_child(newsk, dst); 1390 1391 tcp_sync_mss(newsk, dst_mtu(dst)); 1392 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1393 1394 tcp_initialize_rcv_mss(newsk); 1395 1396 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1397 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1398 1399 #ifdef CONFIG_TCP_MD5SIG 1400 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1401 1402 /* Copy over the MD5 key from the original socket */ 1403 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1404 if (key) { 1405 /* We're using one, so create a matching key 1406 * on the newsk structure. If we fail to get 1407 * memory, then we end up not copying the key 1408 * across. Shucks. 1409 */ 1410 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1411 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1412 sk_gfp_mask(sk, GFP_ATOMIC)); 1413 } 1414 #endif 1415 1416 if (__inet_inherit_port(sk, newsk) < 0) { 1417 inet_csk_prepare_forced_close(newsk); 1418 tcp_done(newsk); 1419 goto out; 1420 } 1421 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1422 &found_dup_sk); 1423 if (*own_req) { 1424 tcp_move_syn(newtp, req); 1425 1426 /* Clone pktoptions received with SYN, if we own the req */ 1427 if (ireq->pktopts) { 1428 newnp->pktoptions = skb_clone(ireq->pktopts, 1429 sk_gfp_mask(sk, GFP_ATOMIC)); 1430 consume_skb(ireq->pktopts); 1431 ireq->pktopts = NULL; 1432 if (newnp->pktoptions) { 1433 tcp_v6_restore_cb(newnp->pktoptions); 1434 skb_set_owner_r(newnp->pktoptions, newsk); 1435 } 1436 } 1437 } else { 1438 if (!req_unhash && found_dup_sk) { 1439 /* This code path should only be executed in the 1440 * syncookie case only 1441 */ 1442 bh_unlock_sock(newsk); 1443 sock_put(newsk); 1444 newsk = NULL; 1445 } 1446 } 1447 1448 return newsk; 1449 1450 out_overflow: 1451 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1452 out_nonewsk: 1453 dst_release(dst); 1454 out: 1455 tcp_listendrop(sk); 1456 return NULL; 1457 } 1458 1459 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1460 u32)); 1461 /* The socket must have it's spinlock held when we get 1462 * here, unless it is a TCP_LISTEN socket. 1463 * 1464 * We have a potential double-lock case here, so even when 1465 * doing backlog processing we use the BH locking scheme. 1466 * This is because we cannot sleep with the original spinlock 1467 * held. 1468 */ 1469 INDIRECT_CALLABLE_SCOPE 1470 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1471 { 1472 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1473 struct sk_buff *opt_skb = NULL; 1474 struct tcp_sock *tp; 1475 1476 /* Imagine: socket is IPv6. IPv4 packet arrives, 1477 goes to IPv4 receive handler and backlogged. 1478 From backlog it always goes here. Kerboom... 1479 Fortunately, tcp_rcv_established and rcv_established 1480 handle them correctly, but it is not case with 1481 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1482 */ 1483 1484 if (skb->protocol == htons(ETH_P_IP)) 1485 return tcp_v4_do_rcv(sk, skb); 1486 1487 /* 1488 * socket locking is here for SMP purposes as backlog rcv 1489 * is currently called with bh processing disabled. 1490 */ 1491 1492 /* Do Stevens' IPV6_PKTOPTIONS. 1493 1494 Yes, guys, it is the only place in our code, where we 1495 may make it not affecting IPv4. 1496 The rest of code is protocol independent, 1497 and I do not like idea to uglify IPv4. 1498 1499 Actually, all the idea behind IPV6_PKTOPTIONS 1500 looks not very well thought. For now we latch 1501 options, received in the last packet, enqueued 1502 by tcp. Feel free to propose better solution. 1503 --ANK (980728) 1504 */ 1505 if (np->rxopt.all) 1506 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1507 1508 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1509 struct dst_entry *dst; 1510 1511 dst = rcu_dereference_protected(sk->sk_rx_dst, 1512 lockdep_sock_is_held(sk)); 1513 1514 sock_rps_save_rxhash(sk, skb); 1515 sk_mark_napi_id(sk, skb); 1516 if (dst) { 1517 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1518 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1519 dst, sk->sk_rx_dst_cookie) == NULL) { 1520 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1521 dst_release(dst); 1522 } 1523 } 1524 1525 tcp_rcv_established(sk, skb); 1526 if (opt_skb) 1527 goto ipv6_pktoptions; 1528 return 0; 1529 } 1530 1531 if (tcp_checksum_complete(skb)) 1532 goto csum_err; 1533 1534 if (sk->sk_state == TCP_LISTEN) { 1535 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1536 1537 if (!nsk) 1538 goto discard; 1539 1540 if (nsk != sk) { 1541 if (tcp_child_process(sk, nsk, skb)) 1542 goto reset; 1543 if (opt_skb) 1544 __kfree_skb(opt_skb); 1545 return 0; 1546 } 1547 } else 1548 sock_rps_save_rxhash(sk, skb); 1549 1550 if (tcp_rcv_state_process(sk, skb)) 1551 goto reset; 1552 if (opt_skb) 1553 goto ipv6_pktoptions; 1554 return 0; 1555 1556 reset: 1557 tcp_v6_send_reset(sk, skb); 1558 discard: 1559 if (opt_skb) 1560 __kfree_skb(opt_skb); 1561 kfree_skb(skb); 1562 return 0; 1563 csum_err: 1564 trace_tcp_bad_csum(skb); 1565 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1566 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1567 goto discard; 1568 1569 1570 ipv6_pktoptions: 1571 /* Do you ask, what is it? 1572 1573 1. skb was enqueued by tcp. 1574 2. skb is added to tail of read queue, rather than out of order. 1575 3. socket is not in passive state. 1576 4. Finally, it really contains options, which user wants to receive. 1577 */ 1578 tp = tcp_sk(sk); 1579 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1580 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1581 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1582 np->mcast_oif = tcp_v6_iif(opt_skb); 1583 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1584 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1585 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1586 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1587 if (np->repflow) 1588 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1589 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1590 skb_set_owner_r(opt_skb, sk); 1591 tcp_v6_restore_cb(opt_skb); 1592 opt_skb = xchg(&np->pktoptions, opt_skb); 1593 } else { 1594 __kfree_skb(opt_skb); 1595 opt_skb = xchg(&np->pktoptions, NULL); 1596 } 1597 } 1598 1599 consume_skb(opt_skb); 1600 return 0; 1601 } 1602 1603 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1604 const struct tcphdr *th) 1605 { 1606 /* This is tricky: we move IP6CB at its correct location into 1607 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1608 * _decode_session6() uses IP6CB(). 1609 * barrier() makes sure compiler won't play aliasing games. 1610 */ 1611 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1612 sizeof(struct inet6_skb_parm)); 1613 barrier(); 1614 1615 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1616 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1617 skb->len - th->doff*4); 1618 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1619 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1620 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1621 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1622 TCP_SKB_CB(skb)->sacked = 0; 1623 TCP_SKB_CB(skb)->has_rxtstamp = 1624 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1625 } 1626 1627 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1628 { 1629 int sdif = inet6_sdif(skb); 1630 int dif = inet6_iif(skb); 1631 const struct tcphdr *th; 1632 const struct ipv6hdr *hdr; 1633 bool refcounted; 1634 struct sock *sk; 1635 int ret; 1636 struct net *net = dev_net(skb->dev); 1637 1638 if (skb->pkt_type != PACKET_HOST) 1639 goto discard_it; 1640 1641 /* 1642 * Count it even if it's bad. 1643 */ 1644 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1645 1646 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1647 goto discard_it; 1648 1649 th = (const struct tcphdr *)skb->data; 1650 1651 if (unlikely(th->doff < sizeof(struct tcphdr)/4)) 1652 goto bad_packet; 1653 if (!pskb_may_pull(skb, th->doff*4)) 1654 goto discard_it; 1655 1656 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1657 goto csum_error; 1658 1659 th = (const struct tcphdr *)skb->data; 1660 hdr = ipv6_hdr(skb); 1661 1662 lookup: 1663 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1664 th->source, th->dest, inet6_iif(skb), sdif, 1665 &refcounted); 1666 if (!sk) 1667 goto no_tcp_socket; 1668 1669 process: 1670 if (sk->sk_state == TCP_TIME_WAIT) 1671 goto do_time_wait; 1672 1673 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1674 struct request_sock *req = inet_reqsk(sk); 1675 bool req_stolen = false; 1676 struct sock *nsk; 1677 1678 sk = req->rsk_listener; 1679 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { 1680 sk_drops_add(sk, skb); 1681 reqsk_put(req); 1682 goto discard_it; 1683 } 1684 if (tcp_checksum_complete(skb)) { 1685 reqsk_put(req); 1686 goto csum_error; 1687 } 1688 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1689 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1690 if (!nsk) { 1691 inet_csk_reqsk_queue_drop_and_put(sk, req); 1692 goto lookup; 1693 } 1694 sk = nsk; 1695 /* reuseport_migrate_sock() has already held one sk_refcnt 1696 * before returning. 1697 */ 1698 } else { 1699 sock_hold(sk); 1700 } 1701 refcounted = true; 1702 nsk = NULL; 1703 if (!tcp_filter(sk, skb)) { 1704 th = (const struct tcphdr *)skb->data; 1705 hdr = ipv6_hdr(skb); 1706 tcp_v6_fill_cb(skb, hdr, th); 1707 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1708 } 1709 if (!nsk) { 1710 reqsk_put(req); 1711 if (req_stolen) { 1712 /* Another cpu got exclusive access to req 1713 * and created a full blown socket. 1714 * Try to feed this packet to this socket 1715 * instead of discarding it. 1716 */ 1717 tcp_v6_restore_cb(skb); 1718 sock_put(sk); 1719 goto lookup; 1720 } 1721 goto discard_and_relse; 1722 } 1723 if (nsk == sk) { 1724 reqsk_put(req); 1725 tcp_v6_restore_cb(skb); 1726 } else if (tcp_child_process(sk, nsk, skb)) { 1727 tcp_v6_send_reset(nsk, skb); 1728 goto discard_and_relse; 1729 } else { 1730 sock_put(sk); 1731 return 0; 1732 } 1733 } 1734 1735 if (static_branch_unlikely(&ip6_min_hopcount)) { 1736 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1737 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1738 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1739 goto discard_and_relse; 1740 } 1741 } 1742 1743 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1744 goto discard_and_relse; 1745 1746 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) 1747 goto discard_and_relse; 1748 1749 if (tcp_filter(sk, skb)) 1750 goto discard_and_relse; 1751 th = (const struct tcphdr *)skb->data; 1752 hdr = ipv6_hdr(skb); 1753 tcp_v6_fill_cb(skb, hdr, th); 1754 1755 skb->dev = NULL; 1756 1757 if (sk->sk_state == TCP_LISTEN) { 1758 ret = tcp_v6_do_rcv(sk, skb); 1759 goto put_and_return; 1760 } 1761 1762 sk_incoming_cpu_update(sk); 1763 1764 sk_defer_free_flush(sk); 1765 bh_lock_sock_nested(sk); 1766 tcp_segs_in(tcp_sk(sk), skb); 1767 ret = 0; 1768 if (!sock_owned_by_user(sk)) { 1769 ret = tcp_v6_do_rcv(sk, skb); 1770 } else { 1771 if (tcp_add_backlog(sk, skb)) 1772 goto discard_and_relse; 1773 } 1774 bh_unlock_sock(sk); 1775 put_and_return: 1776 if (refcounted) 1777 sock_put(sk); 1778 return ret ? -1 : 0; 1779 1780 no_tcp_socket: 1781 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1782 goto discard_it; 1783 1784 tcp_v6_fill_cb(skb, hdr, th); 1785 1786 if (tcp_checksum_complete(skb)) { 1787 csum_error: 1788 trace_tcp_bad_csum(skb); 1789 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1790 bad_packet: 1791 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1792 } else { 1793 tcp_v6_send_reset(NULL, skb); 1794 } 1795 1796 discard_it: 1797 kfree_skb(skb); 1798 return 0; 1799 1800 discard_and_relse: 1801 sk_drops_add(sk, skb); 1802 if (refcounted) 1803 sock_put(sk); 1804 goto discard_it; 1805 1806 do_time_wait: 1807 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1808 inet_twsk_put(inet_twsk(sk)); 1809 goto discard_it; 1810 } 1811 1812 tcp_v6_fill_cb(skb, hdr, th); 1813 1814 if (tcp_checksum_complete(skb)) { 1815 inet_twsk_put(inet_twsk(sk)); 1816 goto csum_error; 1817 } 1818 1819 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1820 case TCP_TW_SYN: 1821 { 1822 struct sock *sk2; 1823 1824 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1825 skb, __tcp_hdrlen(th), 1826 &ipv6_hdr(skb)->saddr, th->source, 1827 &ipv6_hdr(skb)->daddr, 1828 ntohs(th->dest), 1829 tcp_v6_iif_l3_slave(skb), 1830 sdif); 1831 if (sk2) { 1832 struct inet_timewait_sock *tw = inet_twsk(sk); 1833 inet_twsk_deschedule_put(tw); 1834 sk = sk2; 1835 tcp_v6_restore_cb(skb); 1836 refcounted = false; 1837 goto process; 1838 } 1839 } 1840 /* to ACK */ 1841 fallthrough; 1842 case TCP_TW_ACK: 1843 tcp_v6_timewait_ack(sk, skb); 1844 break; 1845 case TCP_TW_RST: 1846 tcp_v6_send_reset(sk, skb); 1847 inet_twsk_deschedule_put(inet_twsk(sk)); 1848 goto discard_it; 1849 case TCP_TW_SUCCESS: 1850 ; 1851 } 1852 goto discard_it; 1853 } 1854 1855 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) 1856 { 1857 const struct ipv6hdr *hdr; 1858 const struct tcphdr *th; 1859 struct sock *sk; 1860 1861 if (skb->pkt_type != PACKET_HOST) 1862 return; 1863 1864 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1865 return; 1866 1867 hdr = ipv6_hdr(skb); 1868 th = tcp_hdr(skb); 1869 1870 if (th->doff < sizeof(struct tcphdr) / 4) 1871 return; 1872 1873 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1874 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1875 &hdr->saddr, th->source, 1876 &hdr->daddr, ntohs(th->dest), 1877 inet6_iif(skb), inet6_sdif(skb)); 1878 if (sk) { 1879 skb->sk = sk; 1880 skb->destructor = sock_edemux; 1881 if (sk_fullsock(sk)) { 1882 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1883 1884 if (dst) 1885 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1886 if (dst && 1887 sk->sk_rx_dst_ifindex == skb->skb_iif) 1888 skb_dst_set_noref(skb, dst); 1889 } 1890 } 1891 } 1892 1893 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1894 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1895 .twsk_unique = tcp_twsk_unique, 1896 .twsk_destructor = tcp_twsk_destructor, 1897 }; 1898 1899 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1900 { 1901 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1902 } 1903 1904 const struct inet_connection_sock_af_ops ipv6_specific = { 1905 .queue_xmit = inet6_csk_xmit, 1906 .send_check = tcp_v6_send_check, 1907 .rebuild_header = inet6_sk_rebuild_header, 1908 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1909 .conn_request = tcp_v6_conn_request, 1910 .syn_recv_sock = tcp_v6_syn_recv_sock, 1911 .net_header_len = sizeof(struct ipv6hdr), 1912 .net_frag_header_len = sizeof(struct frag_hdr), 1913 .setsockopt = ipv6_setsockopt, 1914 .getsockopt = ipv6_getsockopt, 1915 .addr2sockaddr = inet6_csk_addr2sockaddr, 1916 .sockaddr_len = sizeof(struct sockaddr_in6), 1917 .mtu_reduced = tcp_v6_mtu_reduced, 1918 }; 1919 1920 #ifdef CONFIG_TCP_MD5SIG 1921 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1922 .md5_lookup = tcp_v6_md5_lookup, 1923 .calc_md5_hash = tcp_v6_md5_hash_skb, 1924 .md5_parse = tcp_v6_parse_md5_keys, 1925 }; 1926 #endif 1927 1928 /* 1929 * TCP over IPv4 via INET6 API 1930 */ 1931 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1932 .queue_xmit = ip_queue_xmit, 1933 .send_check = tcp_v4_send_check, 1934 .rebuild_header = inet_sk_rebuild_header, 1935 .sk_rx_dst_set = inet_sk_rx_dst_set, 1936 .conn_request = tcp_v6_conn_request, 1937 .syn_recv_sock = tcp_v6_syn_recv_sock, 1938 .net_header_len = sizeof(struct iphdr), 1939 .setsockopt = ipv6_setsockopt, 1940 .getsockopt = ipv6_getsockopt, 1941 .addr2sockaddr = inet6_csk_addr2sockaddr, 1942 .sockaddr_len = sizeof(struct sockaddr_in6), 1943 .mtu_reduced = tcp_v4_mtu_reduced, 1944 }; 1945 1946 #ifdef CONFIG_TCP_MD5SIG 1947 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1948 .md5_lookup = tcp_v4_md5_lookup, 1949 .calc_md5_hash = tcp_v4_md5_hash_skb, 1950 .md5_parse = tcp_v6_parse_md5_keys, 1951 }; 1952 #endif 1953 1954 /* NOTE: A lot of things set to zero explicitly by call to 1955 * sk_alloc() so need not be done here. 1956 */ 1957 static int tcp_v6_init_sock(struct sock *sk) 1958 { 1959 struct inet_connection_sock *icsk = inet_csk(sk); 1960 1961 tcp_init_sock(sk); 1962 1963 icsk->icsk_af_ops = &ipv6_specific; 1964 1965 #ifdef CONFIG_TCP_MD5SIG 1966 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1967 #endif 1968 1969 return 0; 1970 } 1971 1972 static void tcp_v6_destroy_sock(struct sock *sk) 1973 { 1974 tcp_v4_destroy_sock(sk); 1975 inet6_destroy_sock(sk); 1976 } 1977 1978 #ifdef CONFIG_PROC_FS 1979 /* Proc filesystem TCPv6 sock list dumping. */ 1980 static void get_openreq6(struct seq_file *seq, 1981 const struct request_sock *req, int i) 1982 { 1983 long ttd = req->rsk_timer.expires - jiffies; 1984 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1985 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1986 1987 if (ttd < 0) 1988 ttd = 0; 1989 1990 seq_printf(seq, 1991 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1992 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1993 i, 1994 src->s6_addr32[0], src->s6_addr32[1], 1995 src->s6_addr32[2], src->s6_addr32[3], 1996 inet_rsk(req)->ir_num, 1997 dest->s6_addr32[0], dest->s6_addr32[1], 1998 dest->s6_addr32[2], dest->s6_addr32[3], 1999 ntohs(inet_rsk(req)->ir_rmt_port), 2000 TCP_SYN_RECV, 2001 0, 0, /* could print option size, but that is af dependent. */ 2002 1, /* timers active (only the expire timer) */ 2003 jiffies_to_clock_t(ttd), 2004 req->num_timeout, 2005 from_kuid_munged(seq_user_ns(seq), 2006 sock_i_uid(req->rsk_listener)), 2007 0, /* non standard timer */ 2008 0, /* open_requests have no inode */ 2009 0, req); 2010 } 2011 2012 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2013 { 2014 const struct in6_addr *dest, *src; 2015 __u16 destp, srcp; 2016 int timer_active; 2017 unsigned long timer_expires; 2018 const struct inet_sock *inet = inet_sk(sp); 2019 const struct tcp_sock *tp = tcp_sk(sp); 2020 const struct inet_connection_sock *icsk = inet_csk(sp); 2021 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2022 int rx_queue; 2023 int state; 2024 2025 dest = &sp->sk_v6_daddr; 2026 src = &sp->sk_v6_rcv_saddr; 2027 destp = ntohs(inet->inet_dport); 2028 srcp = ntohs(inet->inet_sport); 2029 2030 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2031 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2032 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2033 timer_active = 1; 2034 timer_expires = icsk->icsk_timeout; 2035 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2036 timer_active = 4; 2037 timer_expires = icsk->icsk_timeout; 2038 } else if (timer_pending(&sp->sk_timer)) { 2039 timer_active = 2; 2040 timer_expires = sp->sk_timer.expires; 2041 } else { 2042 timer_active = 0; 2043 timer_expires = jiffies; 2044 } 2045 2046 state = inet_sk_state_load(sp); 2047 if (state == TCP_LISTEN) 2048 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2049 else 2050 /* Because we don't lock the socket, 2051 * we might find a transient negative value. 2052 */ 2053 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2054 READ_ONCE(tp->copied_seq), 0); 2055 2056 seq_printf(seq, 2057 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2058 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2059 i, 2060 src->s6_addr32[0], src->s6_addr32[1], 2061 src->s6_addr32[2], src->s6_addr32[3], srcp, 2062 dest->s6_addr32[0], dest->s6_addr32[1], 2063 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2064 state, 2065 READ_ONCE(tp->write_seq) - tp->snd_una, 2066 rx_queue, 2067 timer_active, 2068 jiffies_delta_to_clock_t(timer_expires - jiffies), 2069 icsk->icsk_retransmits, 2070 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2071 icsk->icsk_probes_out, 2072 sock_i_ino(sp), 2073 refcount_read(&sp->sk_refcnt), sp, 2074 jiffies_to_clock_t(icsk->icsk_rto), 2075 jiffies_to_clock_t(icsk->icsk_ack.ato), 2076 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2077 tp->snd_cwnd, 2078 state == TCP_LISTEN ? 2079 fastopenq->max_qlen : 2080 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2081 ); 2082 } 2083 2084 static void get_timewait6_sock(struct seq_file *seq, 2085 struct inet_timewait_sock *tw, int i) 2086 { 2087 long delta = tw->tw_timer.expires - jiffies; 2088 const struct in6_addr *dest, *src; 2089 __u16 destp, srcp; 2090 2091 dest = &tw->tw_v6_daddr; 2092 src = &tw->tw_v6_rcv_saddr; 2093 destp = ntohs(tw->tw_dport); 2094 srcp = ntohs(tw->tw_sport); 2095 2096 seq_printf(seq, 2097 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2098 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2099 i, 2100 src->s6_addr32[0], src->s6_addr32[1], 2101 src->s6_addr32[2], src->s6_addr32[3], srcp, 2102 dest->s6_addr32[0], dest->s6_addr32[1], 2103 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2104 tw->tw_substate, 0, 0, 2105 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2106 refcount_read(&tw->tw_refcnt), tw); 2107 } 2108 2109 static int tcp6_seq_show(struct seq_file *seq, void *v) 2110 { 2111 struct tcp_iter_state *st; 2112 struct sock *sk = v; 2113 2114 if (v == SEQ_START_TOKEN) { 2115 seq_puts(seq, 2116 " sl " 2117 "local_address " 2118 "remote_address " 2119 "st tx_queue rx_queue tr tm->when retrnsmt" 2120 " uid timeout inode\n"); 2121 goto out; 2122 } 2123 st = seq->private; 2124 2125 if (sk->sk_state == TCP_TIME_WAIT) 2126 get_timewait6_sock(seq, v, st->num); 2127 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2128 get_openreq6(seq, v, st->num); 2129 else 2130 get_tcp6_sock(seq, v, st->num); 2131 out: 2132 return 0; 2133 } 2134 2135 static const struct seq_operations tcp6_seq_ops = { 2136 .show = tcp6_seq_show, 2137 .start = tcp_seq_start, 2138 .next = tcp_seq_next, 2139 .stop = tcp_seq_stop, 2140 }; 2141 2142 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2143 .family = AF_INET6, 2144 }; 2145 2146 int __net_init tcp6_proc_init(struct net *net) 2147 { 2148 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2149 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2150 return -ENOMEM; 2151 return 0; 2152 } 2153 2154 void tcp6_proc_exit(struct net *net) 2155 { 2156 remove_proc_entry("tcp6", net->proc_net); 2157 } 2158 #endif 2159 2160 struct proto tcpv6_prot = { 2161 .name = "TCPv6", 2162 .owner = THIS_MODULE, 2163 .close = tcp_close, 2164 .pre_connect = tcp_v6_pre_connect, 2165 .connect = tcp_v6_connect, 2166 .disconnect = tcp_disconnect, 2167 .accept = inet_csk_accept, 2168 .ioctl = tcp_ioctl, 2169 .init = tcp_v6_init_sock, 2170 .destroy = tcp_v6_destroy_sock, 2171 .shutdown = tcp_shutdown, 2172 .setsockopt = tcp_setsockopt, 2173 .getsockopt = tcp_getsockopt, 2174 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2175 .keepalive = tcp_set_keepalive, 2176 .recvmsg = tcp_recvmsg, 2177 .sendmsg = tcp_sendmsg, 2178 .sendpage = tcp_sendpage, 2179 .backlog_rcv = tcp_v6_do_rcv, 2180 .release_cb = tcp_release_cb, 2181 .hash = inet6_hash, 2182 .unhash = inet_unhash, 2183 .get_port = inet_csk_get_port, 2184 .put_port = inet_put_port, 2185 #ifdef CONFIG_BPF_SYSCALL 2186 .psock_update_sk_prot = tcp_bpf_update_proto, 2187 #endif 2188 .enter_memory_pressure = tcp_enter_memory_pressure, 2189 .leave_memory_pressure = tcp_leave_memory_pressure, 2190 .stream_memory_free = tcp_stream_memory_free, 2191 .sockets_allocated = &tcp_sockets_allocated, 2192 .memory_allocated = &tcp_memory_allocated, 2193 .memory_pressure = &tcp_memory_pressure, 2194 .orphan_count = &tcp_orphan_count, 2195 .sysctl_mem = sysctl_tcp_mem, 2196 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2197 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2198 .max_header = MAX_TCP_HEADER, 2199 .obj_size = sizeof(struct tcp6_sock), 2200 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2201 .twsk_prot = &tcp6_timewait_sock_ops, 2202 .rsk_prot = &tcp6_request_sock_ops, 2203 .h.hashinfo = &tcp_hashinfo, 2204 .no_autobind = true, 2205 .diag_destroy = tcp_abort, 2206 }; 2207 EXPORT_SYMBOL_GPL(tcpv6_prot); 2208 2209 /* thinking of making this const? Don't. 2210 * early_demux can change based on sysctl. 2211 */ 2212 static struct inet6_protocol tcpv6_protocol = { 2213 .early_demux = tcp_v6_early_demux, 2214 .early_demux_handler = tcp_v6_early_demux, 2215 .handler = tcp_v6_rcv, 2216 .err_handler = tcp_v6_err, 2217 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2218 }; 2219 2220 static struct inet_protosw tcpv6_protosw = { 2221 .type = SOCK_STREAM, 2222 .protocol = IPPROTO_TCP, 2223 .prot = &tcpv6_prot, 2224 .ops = &inet6_stream_ops, 2225 .flags = INET_PROTOSW_PERMANENT | 2226 INET_PROTOSW_ICSK, 2227 }; 2228 2229 static int __net_init tcpv6_net_init(struct net *net) 2230 { 2231 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2232 SOCK_RAW, IPPROTO_TCP, net); 2233 } 2234 2235 static void __net_exit tcpv6_net_exit(struct net *net) 2236 { 2237 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2238 } 2239 2240 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2241 { 2242 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2243 } 2244 2245 static struct pernet_operations tcpv6_net_ops = { 2246 .init = tcpv6_net_init, 2247 .exit = tcpv6_net_exit, 2248 .exit_batch = tcpv6_net_exit_batch, 2249 }; 2250 2251 int __init tcpv6_init(void) 2252 { 2253 int ret; 2254 2255 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2256 if (ret) 2257 goto out; 2258 2259 /* register inet6 protocol */ 2260 ret = inet6_register_protosw(&tcpv6_protosw); 2261 if (ret) 2262 goto out_tcpv6_protocol; 2263 2264 ret = register_pernet_subsys(&tcpv6_net_ops); 2265 if (ret) 2266 goto out_tcpv6_protosw; 2267 2268 ret = mptcpv6_init(); 2269 if (ret) 2270 goto out_tcpv6_pernet_subsys; 2271 2272 out: 2273 return ret; 2274 2275 out_tcpv6_pernet_subsys: 2276 unregister_pernet_subsys(&tcpv6_net_ops); 2277 out_tcpv6_protosw: 2278 inet6_unregister_protosw(&tcpv6_protosw); 2279 out_tcpv6_protocol: 2280 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2281 goto out; 2282 } 2283 2284 void tcpv6_exit(void) 2285 { 2286 unregister_pernet_subsys(&tcpv6_net_ops); 2287 inet6_unregister_protosw(&tcpv6_protosw); 2288 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2289 } 2290