1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct inet_timewait_death_row *tcp_death_row; 152 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 153 struct tcp_sock *tp = tcp_sk(sk); 154 struct in6_addr *saddr = NULL, *final_p, final; 155 struct ipv6_txoptions *opt; 156 struct flowi6 fl6; 157 struct dst_entry *dst; 158 int addr_type; 159 int err; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; 291 struct in6_addr prev_v6_rcv_saddr; 292 293 if (icsk->icsk_bind2_hash) { 294 prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo, 295 sk, sock_net(sk), 296 inet->inet_num); 297 prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 298 } 299 saddr = &fl6.saddr; 300 sk->sk_v6_rcv_saddr = *saddr; 301 302 if (prev_addr_hashbucket) { 303 err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); 304 if (err) { 305 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; 306 goto failure; 307 } 308 } 309 } 310 311 /* set the source address */ 312 np->saddr = *saddr; 313 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 314 315 sk->sk_gso_type = SKB_GSO_TCPV6; 316 ip6_dst_store(sk, dst, NULL, NULL); 317 318 icsk->icsk_ext_hdr_len = 0; 319 if (opt) 320 icsk->icsk_ext_hdr_len = opt->opt_flen + 321 opt->opt_nflen; 322 323 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 324 325 inet->inet_dport = usin->sin6_port; 326 327 tcp_set_state(sk, TCP_SYN_SENT); 328 tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; 329 err = inet6_hash_connect(tcp_death_row, sk); 330 if (err) 331 goto late_failure; 332 333 sk_set_txhash(sk); 334 335 if (likely(!tp->repair)) { 336 if (!tp->write_seq) 337 WRITE_ONCE(tp->write_seq, 338 secure_tcpv6_seq(np->saddr.s6_addr32, 339 sk->sk_v6_daddr.s6_addr32, 340 inet->inet_sport, 341 inet->inet_dport)); 342 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 343 np->saddr.s6_addr32, 344 sk->sk_v6_daddr.s6_addr32); 345 } 346 347 if (tcp_fastopen_defer_connect(sk, &err)) 348 return err; 349 if (err) 350 goto late_failure; 351 352 err = tcp_connect(sk); 353 if (err) 354 goto late_failure; 355 356 return 0; 357 358 late_failure: 359 tcp_set_state(sk, TCP_CLOSE); 360 failure: 361 inet->inet_dport = 0; 362 sk->sk_route_caps = 0; 363 return err; 364 } 365 366 static void tcp_v6_mtu_reduced(struct sock *sk) 367 { 368 struct dst_entry *dst; 369 u32 mtu; 370 371 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 372 return; 373 374 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 375 376 /* Drop requests trying to increase our current mss. 377 * Check done in __ip6_rt_update_pmtu() is too late. 378 */ 379 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 380 return; 381 382 dst = inet6_csk_update_pmtu(sk, mtu); 383 if (!dst) 384 return; 385 386 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 387 tcp_sync_mss(sk, dst_mtu(dst)); 388 tcp_simple_retransmit(sk); 389 } 390 } 391 392 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 393 u8 type, u8 code, int offset, __be32 info) 394 { 395 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 396 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 397 struct net *net = dev_net(skb->dev); 398 struct request_sock *fastopen; 399 struct ipv6_pinfo *np; 400 struct tcp_sock *tp; 401 __u32 seq, snd_una; 402 struct sock *sk; 403 bool fatal; 404 int err; 405 406 sk = __inet6_lookup_established(net, &tcp_hashinfo, 407 &hdr->daddr, th->dest, 408 &hdr->saddr, ntohs(th->source), 409 skb->dev->ifindex, inet6_sdif(skb)); 410 411 if (!sk) { 412 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 413 ICMP6_MIB_INERRORS); 414 return -ENOENT; 415 } 416 417 if (sk->sk_state == TCP_TIME_WAIT) { 418 inet_twsk_put(inet_twsk(sk)); 419 return 0; 420 } 421 seq = ntohl(th->seq); 422 fatal = icmpv6_err_convert(type, code, &err); 423 if (sk->sk_state == TCP_NEW_SYN_RECV) { 424 tcp_req_err(sk, seq, fatal); 425 return 0; 426 } 427 428 bh_lock_sock(sk); 429 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 430 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 431 432 if (sk->sk_state == TCP_CLOSE) 433 goto out; 434 435 if (static_branch_unlikely(&ip6_min_hopcount)) { 436 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 437 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 438 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 439 goto out; 440 } 441 } 442 443 tp = tcp_sk(sk); 444 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 445 fastopen = rcu_dereference(tp->fastopen_rsk); 446 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 447 if (sk->sk_state != TCP_LISTEN && 448 !between(seq, snd_una, tp->snd_nxt)) { 449 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 450 goto out; 451 } 452 453 np = tcp_inet6_sk(sk); 454 455 if (type == NDISC_REDIRECT) { 456 if (!sock_owned_by_user(sk)) { 457 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 458 459 if (dst) 460 dst->ops->redirect(dst, sk, skb); 461 } 462 goto out; 463 } 464 465 if (type == ICMPV6_PKT_TOOBIG) { 466 u32 mtu = ntohl(info); 467 468 /* We are not interested in TCP_LISTEN and open_requests 469 * (SYN-ACKs send out by Linux are always <576bytes so 470 * they should go through unfragmented). 471 */ 472 if (sk->sk_state == TCP_LISTEN) 473 goto out; 474 475 if (!ip6_sk_accept_pmtu(sk)) 476 goto out; 477 478 if (mtu < IPV6_MIN_MTU) 479 goto out; 480 481 WRITE_ONCE(tp->mtu_info, mtu); 482 483 if (!sock_owned_by_user(sk)) 484 tcp_v6_mtu_reduced(sk); 485 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 486 &sk->sk_tsq_flags)) 487 sock_hold(sk); 488 goto out; 489 } 490 491 492 /* Might be for an request_sock */ 493 switch (sk->sk_state) { 494 case TCP_SYN_SENT: 495 case TCP_SYN_RECV: 496 /* Only in fast or simultaneous open. If a fast open socket is 497 * already accepted it is treated as a connected one below. 498 */ 499 if (fastopen && !fastopen->sk) 500 break; 501 502 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 503 504 if (!sock_owned_by_user(sk)) { 505 sk->sk_err = err; 506 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 507 508 tcp_done(sk); 509 } else 510 sk->sk_err_soft = err; 511 goto out; 512 case TCP_LISTEN: 513 break; 514 default: 515 /* check if this ICMP message allows revert of backoff. 516 * (see RFC 6069) 517 */ 518 if (!fastopen && type == ICMPV6_DEST_UNREACH && 519 code == ICMPV6_NOROUTE) 520 tcp_ld_RTO_revert(sk, seq); 521 } 522 523 if (!sock_owned_by_user(sk) && np->recverr) { 524 sk->sk_err = err; 525 sk_error_report(sk); 526 } else 527 sk->sk_err_soft = err; 528 529 out: 530 bh_unlock_sock(sk); 531 sock_put(sk); 532 return 0; 533 } 534 535 536 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 537 struct flowi *fl, 538 struct request_sock *req, 539 struct tcp_fastopen_cookie *foc, 540 enum tcp_synack_type synack_type, 541 struct sk_buff *syn_skb) 542 { 543 struct inet_request_sock *ireq = inet_rsk(req); 544 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 545 struct ipv6_txoptions *opt; 546 struct flowi6 *fl6 = &fl->u.ip6; 547 struct sk_buff *skb; 548 int err = -ENOMEM; 549 u8 tclass; 550 551 /* First, grab a route. */ 552 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 553 IPPROTO_TCP)) == NULL) 554 goto done; 555 556 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 557 558 if (skb) { 559 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 560 &ireq->ir_v6_rmt_addr); 561 562 fl6->daddr = ireq->ir_v6_rmt_addr; 563 if (np->repflow && ireq->pktopts) 564 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 565 566 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 567 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 568 (np->tclass & INET_ECN_MASK) : 569 np->tclass; 570 571 if (!INET_ECN_is_capable(tclass) && 572 tcp_bpf_ca_needs_ecn((struct sock *)req)) 573 tclass |= INET_ECN_ECT_0; 574 575 rcu_read_lock(); 576 opt = ireq->ipv6_opt; 577 if (!opt) 578 opt = rcu_dereference(np->opt); 579 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 580 tclass, sk->sk_priority); 581 rcu_read_unlock(); 582 err = net_xmit_eval(err); 583 } 584 585 done: 586 return err; 587 } 588 589 590 static void tcp_v6_reqsk_destructor(struct request_sock *req) 591 { 592 kfree(inet_rsk(req)->ipv6_opt); 593 consume_skb(inet_rsk(req)->pktopts); 594 } 595 596 #ifdef CONFIG_TCP_MD5SIG 597 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 598 const struct in6_addr *addr, 599 int l3index) 600 { 601 return tcp_md5_do_lookup(sk, l3index, 602 (union tcp_md5_addr *)addr, AF_INET6); 603 } 604 605 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 606 const struct sock *addr_sk) 607 { 608 int l3index; 609 610 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 611 addr_sk->sk_bound_dev_if); 612 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 613 l3index); 614 } 615 616 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 617 sockptr_t optval, int optlen) 618 { 619 struct tcp_md5sig cmd; 620 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 621 int l3index = 0; 622 u8 prefixlen; 623 u8 flags; 624 625 if (optlen < sizeof(cmd)) 626 return -EINVAL; 627 628 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 629 return -EFAULT; 630 631 if (sin6->sin6_family != AF_INET6) 632 return -EINVAL; 633 634 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 635 636 if (optname == TCP_MD5SIG_EXT && 637 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 638 prefixlen = cmd.tcpm_prefixlen; 639 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 640 prefixlen > 32)) 641 return -EINVAL; 642 } else { 643 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 644 } 645 646 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 647 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 648 struct net_device *dev; 649 650 rcu_read_lock(); 651 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 652 if (dev && netif_is_l3_master(dev)) 653 l3index = dev->ifindex; 654 rcu_read_unlock(); 655 656 /* ok to reference set/not set outside of rcu; 657 * right now device MUST be an L3 master 658 */ 659 if (!dev || !l3index) 660 return -EINVAL; 661 } 662 663 if (!cmd.tcpm_keylen) { 664 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 665 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 666 AF_INET, prefixlen, 667 l3index, flags); 668 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 669 AF_INET6, prefixlen, l3index, flags); 670 } 671 672 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 673 return -EINVAL; 674 675 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 676 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 677 AF_INET, prefixlen, l3index, flags, 678 cmd.tcpm_key, cmd.tcpm_keylen, 679 GFP_KERNEL); 680 681 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 682 AF_INET6, prefixlen, l3index, flags, 683 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 684 } 685 686 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 687 const struct in6_addr *daddr, 688 const struct in6_addr *saddr, 689 const struct tcphdr *th, int nbytes) 690 { 691 struct tcp6_pseudohdr *bp; 692 struct scatterlist sg; 693 struct tcphdr *_th; 694 695 bp = hp->scratch; 696 /* 1. TCP pseudo-header (RFC2460) */ 697 bp->saddr = *saddr; 698 bp->daddr = *daddr; 699 bp->protocol = cpu_to_be32(IPPROTO_TCP); 700 bp->len = cpu_to_be32(nbytes); 701 702 _th = (struct tcphdr *)(bp + 1); 703 memcpy(_th, th, sizeof(*th)); 704 _th->check = 0; 705 706 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 707 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 708 sizeof(*bp) + sizeof(*th)); 709 return crypto_ahash_update(hp->md5_req); 710 } 711 712 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 713 const struct in6_addr *daddr, struct in6_addr *saddr, 714 const struct tcphdr *th) 715 { 716 struct tcp_md5sig_pool *hp; 717 struct ahash_request *req; 718 719 hp = tcp_get_md5sig_pool(); 720 if (!hp) 721 goto clear_hash_noput; 722 req = hp->md5_req; 723 724 if (crypto_ahash_init(req)) 725 goto clear_hash; 726 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 727 goto clear_hash; 728 if (tcp_md5_hash_key(hp, key)) 729 goto clear_hash; 730 ahash_request_set_crypt(req, NULL, md5_hash, 0); 731 if (crypto_ahash_final(req)) 732 goto clear_hash; 733 734 tcp_put_md5sig_pool(); 735 return 0; 736 737 clear_hash: 738 tcp_put_md5sig_pool(); 739 clear_hash_noput: 740 memset(md5_hash, 0, 16); 741 return 1; 742 } 743 744 static int tcp_v6_md5_hash_skb(char *md5_hash, 745 const struct tcp_md5sig_key *key, 746 const struct sock *sk, 747 const struct sk_buff *skb) 748 { 749 const struct in6_addr *saddr, *daddr; 750 struct tcp_md5sig_pool *hp; 751 struct ahash_request *req; 752 const struct tcphdr *th = tcp_hdr(skb); 753 754 if (sk) { /* valid for establish/request sockets */ 755 saddr = &sk->sk_v6_rcv_saddr; 756 daddr = &sk->sk_v6_daddr; 757 } else { 758 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 759 saddr = &ip6h->saddr; 760 daddr = &ip6h->daddr; 761 } 762 763 hp = tcp_get_md5sig_pool(); 764 if (!hp) 765 goto clear_hash_noput; 766 req = hp->md5_req; 767 768 if (crypto_ahash_init(req)) 769 goto clear_hash; 770 771 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 772 goto clear_hash; 773 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 774 goto clear_hash; 775 if (tcp_md5_hash_key(hp, key)) 776 goto clear_hash; 777 ahash_request_set_crypt(req, NULL, md5_hash, 0); 778 if (crypto_ahash_final(req)) 779 goto clear_hash; 780 781 tcp_put_md5sig_pool(); 782 return 0; 783 784 clear_hash: 785 tcp_put_md5sig_pool(); 786 clear_hash_noput: 787 memset(md5_hash, 0, 16); 788 return 1; 789 } 790 791 #endif 792 793 static void tcp_v6_init_req(struct request_sock *req, 794 const struct sock *sk_listener, 795 struct sk_buff *skb) 796 { 797 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 798 struct inet_request_sock *ireq = inet_rsk(req); 799 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 800 801 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 802 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 803 804 /* So that link locals have meaning */ 805 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 806 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 807 ireq->ir_iif = tcp_v6_iif(skb); 808 809 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 810 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 811 np->rxopt.bits.rxinfo || 812 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 813 np->rxopt.bits.rxohlim || np->repflow)) { 814 refcount_inc(&skb->users); 815 ireq->pktopts = skb; 816 } 817 } 818 819 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 820 struct sk_buff *skb, 821 struct flowi *fl, 822 struct request_sock *req) 823 { 824 tcp_v6_init_req(req, sk, skb); 825 826 if (security_inet_conn_request(sk, skb, req)) 827 return NULL; 828 829 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 830 } 831 832 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 833 .family = AF_INET6, 834 .obj_size = sizeof(struct tcp6_request_sock), 835 .rtx_syn_ack = tcp_rtx_synack, 836 .send_ack = tcp_v6_reqsk_send_ack, 837 .destructor = tcp_v6_reqsk_destructor, 838 .send_reset = tcp_v6_send_reset, 839 .syn_ack_timeout = tcp_syn_ack_timeout, 840 }; 841 842 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 843 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 844 sizeof(struct ipv6hdr), 845 #ifdef CONFIG_TCP_MD5SIG 846 .req_md5_lookup = tcp_v6_md5_lookup, 847 .calc_md5_hash = tcp_v6_md5_hash_skb, 848 #endif 849 #ifdef CONFIG_SYN_COOKIES 850 .cookie_init_seq = cookie_v6_init_sequence, 851 #endif 852 .route_req = tcp_v6_route_req, 853 .init_seq = tcp_v6_init_seq, 854 .init_ts_off = tcp_v6_init_ts_off, 855 .send_synack = tcp_v6_send_synack, 856 }; 857 858 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 859 u32 ack, u32 win, u32 tsval, u32 tsecr, 860 int oif, struct tcp_md5sig_key *key, int rst, 861 u8 tclass, __be32 label, u32 priority) 862 { 863 const struct tcphdr *th = tcp_hdr(skb); 864 struct tcphdr *t1; 865 struct sk_buff *buff; 866 struct flowi6 fl6; 867 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 868 struct sock *ctl_sk = net->ipv6.tcp_sk; 869 unsigned int tot_len = sizeof(struct tcphdr); 870 __be32 mrst = 0, *topt; 871 struct dst_entry *dst; 872 __u32 mark = 0; 873 874 if (tsecr) 875 tot_len += TCPOLEN_TSTAMP_ALIGNED; 876 #ifdef CONFIG_TCP_MD5SIG 877 if (key) 878 tot_len += TCPOLEN_MD5SIG_ALIGNED; 879 #endif 880 881 #ifdef CONFIG_MPTCP 882 if (rst && !key) { 883 mrst = mptcp_reset_option(skb); 884 885 if (mrst) 886 tot_len += sizeof(__be32); 887 } 888 #endif 889 890 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 891 if (!buff) 892 return; 893 894 skb_reserve(buff, MAX_TCP_HEADER); 895 896 t1 = skb_push(buff, tot_len); 897 skb_reset_transport_header(buff); 898 899 /* Swap the send and the receive. */ 900 memset(t1, 0, sizeof(*t1)); 901 t1->dest = th->source; 902 t1->source = th->dest; 903 t1->doff = tot_len / 4; 904 t1->seq = htonl(seq); 905 t1->ack_seq = htonl(ack); 906 t1->ack = !rst || !th->ack; 907 t1->rst = rst; 908 t1->window = htons(win); 909 910 topt = (__be32 *)(t1 + 1); 911 912 if (tsecr) { 913 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 914 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 915 *topt++ = htonl(tsval); 916 *topt++ = htonl(tsecr); 917 } 918 919 if (mrst) 920 *topt++ = mrst; 921 922 #ifdef CONFIG_TCP_MD5SIG 923 if (key) { 924 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 925 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 926 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 927 &ipv6_hdr(skb)->saddr, 928 &ipv6_hdr(skb)->daddr, t1); 929 } 930 #endif 931 932 memset(&fl6, 0, sizeof(fl6)); 933 fl6.daddr = ipv6_hdr(skb)->saddr; 934 fl6.saddr = ipv6_hdr(skb)->daddr; 935 fl6.flowlabel = label; 936 937 buff->ip_summed = CHECKSUM_PARTIAL; 938 939 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 940 941 fl6.flowi6_proto = IPPROTO_TCP; 942 if (rt6_need_strict(&fl6.daddr) && !oif) 943 fl6.flowi6_oif = tcp_v6_iif(skb); 944 else { 945 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 946 oif = skb->skb_iif; 947 948 fl6.flowi6_oif = oif; 949 } 950 951 if (sk) { 952 if (sk->sk_state == TCP_TIME_WAIT) { 953 mark = inet_twsk(sk)->tw_mark; 954 /* autoflowlabel relies on buff->hash */ 955 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 956 PKT_HASH_TYPE_L4); 957 } else { 958 mark = sk->sk_mark; 959 } 960 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 961 } 962 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 963 fl6.fl6_dport = t1->dest; 964 fl6.fl6_sport = t1->source; 965 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 966 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 967 968 /* Pass a socket to ip6_dst_lookup either it is for RST 969 * Underlying function will use this to retrieve the network 970 * namespace 971 */ 972 if (sk && sk->sk_state != TCP_TIME_WAIT) 973 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 974 else 975 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 976 if (!IS_ERR(dst)) { 977 skb_dst_set(buff, dst); 978 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 979 tclass & ~INET_ECN_MASK, priority); 980 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 981 if (rst) 982 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 983 return; 984 } 985 986 kfree_skb(buff); 987 } 988 989 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 990 { 991 const struct tcphdr *th = tcp_hdr(skb); 992 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 993 u32 seq = 0, ack_seq = 0; 994 struct tcp_md5sig_key *key = NULL; 995 #ifdef CONFIG_TCP_MD5SIG 996 const __u8 *hash_location = NULL; 997 unsigned char newhash[16]; 998 int genhash; 999 struct sock *sk1 = NULL; 1000 #endif 1001 __be32 label = 0; 1002 u32 priority = 0; 1003 struct net *net; 1004 int oif = 0; 1005 1006 if (th->rst) 1007 return; 1008 1009 /* If sk not NULL, it means we did a successful lookup and incoming 1010 * route had to be correct. prequeue might have dropped our dst. 1011 */ 1012 if (!sk && !ipv6_unicast_destination(skb)) 1013 return; 1014 1015 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1016 #ifdef CONFIG_TCP_MD5SIG 1017 rcu_read_lock(); 1018 hash_location = tcp_parse_md5sig_option(th); 1019 if (sk && sk_fullsock(sk)) { 1020 int l3index; 1021 1022 /* sdif set, means packet ingressed via a device 1023 * in an L3 domain and inet_iif is set to it. 1024 */ 1025 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1026 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1027 } else if (hash_location) { 1028 int dif = tcp_v6_iif_l3_slave(skb); 1029 int sdif = tcp_v6_sdif(skb); 1030 int l3index; 1031 1032 /* 1033 * active side is lost. Try to find listening socket through 1034 * source port, and then find md5 key through listening socket. 1035 * we are not loose security here: 1036 * Incoming packet is checked with md5 hash with finding key, 1037 * no RST generated if md5 hash doesn't match. 1038 */ 1039 sk1 = inet6_lookup_listener(net, 1040 &tcp_hashinfo, NULL, 0, 1041 &ipv6h->saddr, 1042 th->source, &ipv6h->daddr, 1043 ntohs(th->source), dif, sdif); 1044 if (!sk1) 1045 goto out; 1046 1047 /* sdif set, means packet ingressed via a device 1048 * in an L3 domain and dif is set to it. 1049 */ 1050 l3index = tcp_v6_sdif(skb) ? dif : 0; 1051 1052 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1053 if (!key) 1054 goto out; 1055 1056 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1057 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1058 goto out; 1059 } 1060 #endif 1061 1062 if (th->ack) 1063 seq = ntohl(th->ack_seq); 1064 else 1065 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1066 (th->doff << 2); 1067 1068 if (sk) { 1069 oif = sk->sk_bound_dev_if; 1070 if (sk_fullsock(sk)) { 1071 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1072 1073 trace_tcp_send_reset(sk, skb); 1074 if (np->repflow) 1075 label = ip6_flowlabel(ipv6h); 1076 priority = sk->sk_priority; 1077 } 1078 if (sk->sk_state == TCP_TIME_WAIT) { 1079 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1080 priority = inet_twsk(sk)->tw_priority; 1081 } 1082 } else { 1083 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1084 label = ip6_flowlabel(ipv6h); 1085 } 1086 1087 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1088 ipv6_get_dsfield(ipv6h), label, priority); 1089 1090 #ifdef CONFIG_TCP_MD5SIG 1091 out: 1092 rcu_read_unlock(); 1093 #endif 1094 } 1095 1096 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1097 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1098 struct tcp_md5sig_key *key, u8 tclass, 1099 __be32 label, u32 priority) 1100 { 1101 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1102 tclass, label, priority); 1103 } 1104 1105 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1106 { 1107 struct inet_timewait_sock *tw = inet_twsk(sk); 1108 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1109 1110 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1111 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1112 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1113 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1114 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1115 1116 inet_twsk_put(tw); 1117 } 1118 1119 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1120 struct request_sock *req) 1121 { 1122 int l3index; 1123 1124 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1125 1126 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1127 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1128 */ 1129 /* RFC 7323 2.3 1130 * The window field (SEG.WND) of every outgoing segment, with the 1131 * exception of <SYN> segments, MUST be right-shifted by 1132 * Rcv.Wind.Shift bits: 1133 */ 1134 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1135 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1136 tcp_rsk(req)->rcv_nxt, 1137 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1138 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1139 req->ts_recent, sk->sk_bound_dev_if, 1140 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1141 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1142 } 1143 1144 1145 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1146 { 1147 #ifdef CONFIG_SYN_COOKIES 1148 const struct tcphdr *th = tcp_hdr(skb); 1149 1150 if (!th->syn) 1151 sk = cookie_v6_check(sk, skb); 1152 #endif 1153 return sk; 1154 } 1155 1156 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1157 struct tcphdr *th, u32 *cookie) 1158 { 1159 u16 mss = 0; 1160 #ifdef CONFIG_SYN_COOKIES 1161 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1162 &tcp_request_sock_ipv6_ops, sk, th); 1163 if (mss) { 1164 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1165 tcp_synq_overflow(sk); 1166 } 1167 #endif 1168 return mss; 1169 } 1170 1171 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1172 { 1173 if (skb->protocol == htons(ETH_P_IP)) 1174 return tcp_v4_conn_request(sk, skb); 1175 1176 if (!ipv6_unicast_destination(skb)) 1177 goto drop; 1178 1179 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1180 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1181 return 0; 1182 } 1183 1184 return tcp_conn_request(&tcp6_request_sock_ops, 1185 &tcp_request_sock_ipv6_ops, sk, skb); 1186 1187 drop: 1188 tcp_listendrop(sk); 1189 return 0; /* don't send reset */ 1190 } 1191 1192 static void tcp_v6_restore_cb(struct sk_buff *skb) 1193 { 1194 /* We need to move header back to the beginning if xfrm6_policy_check() 1195 * and tcp_v6_fill_cb() are going to be called again. 1196 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1197 */ 1198 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1199 sizeof(struct inet6_skb_parm)); 1200 } 1201 1202 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1203 struct request_sock *req, 1204 struct dst_entry *dst, 1205 struct request_sock *req_unhash, 1206 bool *own_req) 1207 { 1208 struct inet_request_sock *ireq; 1209 struct ipv6_pinfo *newnp; 1210 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1211 struct ipv6_txoptions *opt; 1212 struct inet_sock *newinet; 1213 bool found_dup_sk = false; 1214 struct tcp_sock *newtp; 1215 struct sock *newsk; 1216 #ifdef CONFIG_TCP_MD5SIG 1217 struct tcp_md5sig_key *key; 1218 int l3index; 1219 #endif 1220 struct flowi6 fl6; 1221 1222 if (skb->protocol == htons(ETH_P_IP)) { 1223 /* 1224 * v6 mapped 1225 */ 1226 1227 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1228 req_unhash, own_req); 1229 1230 if (!newsk) 1231 return NULL; 1232 1233 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1234 1235 newnp = tcp_inet6_sk(newsk); 1236 newtp = tcp_sk(newsk); 1237 1238 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1239 1240 newnp->saddr = newsk->sk_v6_rcv_saddr; 1241 1242 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1243 if (sk_is_mptcp(newsk)) 1244 mptcpv6_handle_mapped(newsk, true); 1245 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1246 #ifdef CONFIG_TCP_MD5SIG 1247 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1248 #endif 1249 1250 newnp->ipv6_mc_list = NULL; 1251 newnp->ipv6_ac_list = NULL; 1252 newnp->ipv6_fl_list = NULL; 1253 newnp->pktoptions = NULL; 1254 newnp->opt = NULL; 1255 newnp->mcast_oif = inet_iif(skb); 1256 newnp->mcast_hops = ip_hdr(skb)->ttl; 1257 newnp->rcv_flowinfo = 0; 1258 if (np->repflow) 1259 newnp->flow_label = 0; 1260 1261 /* 1262 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1263 * here, tcp_create_openreq_child now does this for us, see the comment in 1264 * that function for the gory details. -acme 1265 */ 1266 1267 /* It is tricky place. Until this moment IPv4 tcp 1268 worked with IPv6 icsk.icsk_af_ops. 1269 Sync it now. 1270 */ 1271 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1272 1273 return newsk; 1274 } 1275 1276 ireq = inet_rsk(req); 1277 1278 if (sk_acceptq_is_full(sk)) 1279 goto out_overflow; 1280 1281 if (!dst) { 1282 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1283 if (!dst) 1284 goto out; 1285 } 1286 1287 newsk = tcp_create_openreq_child(sk, req, skb); 1288 if (!newsk) 1289 goto out_nonewsk; 1290 1291 /* 1292 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1293 * count here, tcp_create_openreq_child now does this for us, see the 1294 * comment in that function for the gory details. -acme 1295 */ 1296 1297 newsk->sk_gso_type = SKB_GSO_TCPV6; 1298 ip6_dst_store(newsk, dst, NULL, NULL); 1299 inet6_sk_rx_dst_set(newsk, skb); 1300 1301 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1302 1303 newtp = tcp_sk(newsk); 1304 newinet = inet_sk(newsk); 1305 newnp = tcp_inet6_sk(newsk); 1306 1307 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1308 1309 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1310 newnp->saddr = ireq->ir_v6_loc_addr; 1311 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1312 newsk->sk_bound_dev_if = ireq->ir_iif; 1313 1314 /* Now IPv6 options... 1315 1316 First: no IPv4 options. 1317 */ 1318 newinet->inet_opt = NULL; 1319 newnp->ipv6_mc_list = NULL; 1320 newnp->ipv6_ac_list = NULL; 1321 newnp->ipv6_fl_list = NULL; 1322 1323 /* Clone RX bits */ 1324 newnp->rxopt.all = np->rxopt.all; 1325 1326 newnp->pktoptions = NULL; 1327 newnp->opt = NULL; 1328 newnp->mcast_oif = tcp_v6_iif(skb); 1329 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1330 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1331 if (np->repflow) 1332 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1333 1334 /* Set ToS of the new socket based upon the value of incoming SYN. 1335 * ECT bits are set later in tcp_init_transfer(). 1336 */ 1337 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1338 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1339 1340 /* Clone native IPv6 options from listening socket (if any) 1341 1342 Yes, keeping reference count would be much more clever, 1343 but we make one more one thing there: reattach optmem 1344 to newsk. 1345 */ 1346 opt = ireq->ipv6_opt; 1347 if (!opt) 1348 opt = rcu_dereference(np->opt); 1349 if (opt) { 1350 opt = ipv6_dup_options(newsk, opt); 1351 RCU_INIT_POINTER(newnp->opt, opt); 1352 } 1353 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1354 if (opt) 1355 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1356 opt->opt_flen; 1357 1358 tcp_ca_openreq_child(newsk, dst); 1359 1360 tcp_sync_mss(newsk, dst_mtu(dst)); 1361 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1362 1363 tcp_initialize_rcv_mss(newsk); 1364 1365 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1366 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1367 1368 #ifdef CONFIG_TCP_MD5SIG 1369 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1370 1371 /* Copy over the MD5 key from the original socket */ 1372 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1373 if (key) { 1374 /* We're using one, so create a matching key 1375 * on the newsk structure. If we fail to get 1376 * memory, then we end up not copying the key 1377 * across. Shucks. 1378 */ 1379 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1380 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1381 sk_gfp_mask(sk, GFP_ATOMIC)); 1382 } 1383 #endif 1384 1385 if (__inet_inherit_port(sk, newsk) < 0) { 1386 inet_csk_prepare_forced_close(newsk); 1387 tcp_done(newsk); 1388 goto out; 1389 } 1390 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1391 &found_dup_sk); 1392 if (*own_req) { 1393 tcp_move_syn(newtp, req); 1394 1395 /* Clone pktoptions received with SYN, if we own the req */ 1396 if (ireq->pktopts) { 1397 newnp->pktoptions = skb_clone(ireq->pktopts, 1398 sk_gfp_mask(sk, GFP_ATOMIC)); 1399 consume_skb(ireq->pktopts); 1400 ireq->pktopts = NULL; 1401 if (newnp->pktoptions) { 1402 tcp_v6_restore_cb(newnp->pktoptions); 1403 skb_set_owner_r(newnp->pktoptions, newsk); 1404 } 1405 } 1406 } else { 1407 if (!req_unhash && found_dup_sk) { 1408 /* This code path should only be executed in the 1409 * syncookie case only 1410 */ 1411 bh_unlock_sock(newsk); 1412 sock_put(newsk); 1413 newsk = NULL; 1414 } 1415 } 1416 1417 return newsk; 1418 1419 out_overflow: 1420 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1421 out_nonewsk: 1422 dst_release(dst); 1423 out: 1424 tcp_listendrop(sk); 1425 return NULL; 1426 } 1427 1428 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1429 u32)); 1430 /* The socket must have it's spinlock held when we get 1431 * here, unless it is a TCP_LISTEN socket. 1432 * 1433 * We have a potential double-lock case here, so even when 1434 * doing backlog processing we use the BH locking scheme. 1435 * This is because we cannot sleep with the original spinlock 1436 * held. 1437 */ 1438 INDIRECT_CALLABLE_SCOPE 1439 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1440 { 1441 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1442 struct sk_buff *opt_skb = NULL; 1443 enum skb_drop_reason reason; 1444 struct tcp_sock *tp; 1445 1446 /* Imagine: socket is IPv6. IPv4 packet arrives, 1447 goes to IPv4 receive handler and backlogged. 1448 From backlog it always goes here. Kerboom... 1449 Fortunately, tcp_rcv_established and rcv_established 1450 handle them correctly, but it is not case with 1451 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1452 */ 1453 1454 if (skb->protocol == htons(ETH_P_IP)) 1455 return tcp_v4_do_rcv(sk, skb); 1456 1457 /* 1458 * socket locking is here for SMP purposes as backlog rcv 1459 * is currently called with bh processing disabled. 1460 */ 1461 1462 /* Do Stevens' IPV6_PKTOPTIONS. 1463 1464 Yes, guys, it is the only place in our code, where we 1465 may make it not affecting IPv4. 1466 The rest of code is protocol independent, 1467 and I do not like idea to uglify IPv4. 1468 1469 Actually, all the idea behind IPV6_PKTOPTIONS 1470 looks not very well thought. For now we latch 1471 options, received in the last packet, enqueued 1472 by tcp. Feel free to propose better solution. 1473 --ANK (980728) 1474 */ 1475 if (np->rxopt.all) 1476 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1477 1478 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1479 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1480 struct dst_entry *dst; 1481 1482 dst = rcu_dereference_protected(sk->sk_rx_dst, 1483 lockdep_sock_is_held(sk)); 1484 1485 sock_rps_save_rxhash(sk, skb); 1486 sk_mark_napi_id(sk, skb); 1487 if (dst) { 1488 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1489 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1490 dst, sk->sk_rx_dst_cookie) == NULL) { 1491 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1492 dst_release(dst); 1493 } 1494 } 1495 1496 tcp_rcv_established(sk, skb); 1497 if (opt_skb) 1498 goto ipv6_pktoptions; 1499 return 0; 1500 } 1501 1502 if (tcp_checksum_complete(skb)) 1503 goto csum_err; 1504 1505 if (sk->sk_state == TCP_LISTEN) { 1506 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1507 1508 if (!nsk) 1509 goto discard; 1510 1511 if (nsk != sk) { 1512 if (tcp_child_process(sk, nsk, skb)) 1513 goto reset; 1514 if (opt_skb) 1515 __kfree_skb(opt_skb); 1516 return 0; 1517 } 1518 } else 1519 sock_rps_save_rxhash(sk, skb); 1520 1521 if (tcp_rcv_state_process(sk, skb)) 1522 goto reset; 1523 if (opt_skb) 1524 goto ipv6_pktoptions; 1525 return 0; 1526 1527 reset: 1528 tcp_v6_send_reset(sk, skb); 1529 discard: 1530 if (opt_skb) 1531 __kfree_skb(opt_skb); 1532 kfree_skb_reason(skb, reason); 1533 return 0; 1534 csum_err: 1535 reason = SKB_DROP_REASON_TCP_CSUM; 1536 trace_tcp_bad_csum(skb); 1537 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1538 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1539 goto discard; 1540 1541 1542 ipv6_pktoptions: 1543 /* Do you ask, what is it? 1544 1545 1. skb was enqueued by tcp. 1546 2. skb is added to tail of read queue, rather than out of order. 1547 3. socket is not in passive state. 1548 4. Finally, it really contains options, which user wants to receive. 1549 */ 1550 tp = tcp_sk(sk); 1551 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1552 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1553 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1554 np->mcast_oif = tcp_v6_iif(opt_skb); 1555 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1556 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1557 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1558 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1559 if (np->repflow) 1560 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1561 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1562 skb_set_owner_r(opt_skb, sk); 1563 tcp_v6_restore_cb(opt_skb); 1564 opt_skb = xchg(&np->pktoptions, opt_skb); 1565 } else { 1566 __kfree_skb(opt_skb); 1567 opt_skb = xchg(&np->pktoptions, NULL); 1568 } 1569 } 1570 1571 consume_skb(opt_skb); 1572 return 0; 1573 } 1574 1575 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1576 const struct tcphdr *th) 1577 { 1578 /* This is tricky: we move IP6CB at its correct location into 1579 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1580 * _decode_session6() uses IP6CB(). 1581 * barrier() makes sure compiler won't play aliasing games. 1582 */ 1583 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1584 sizeof(struct inet6_skb_parm)); 1585 barrier(); 1586 1587 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1588 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1589 skb->len - th->doff*4); 1590 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1591 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1592 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1593 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1594 TCP_SKB_CB(skb)->sacked = 0; 1595 TCP_SKB_CB(skb)->has_rxtstamp = 1596 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1597 } 1598 1599 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1600 { 1601 enum skb_drop_reason drop_reason; 1602 int sdif = inet6_sdif(skb); 1603 int dif = inet6_iif(skb); 1604 const struct tcphdr *th; 1605 const struct ipv6hdr *hdr; 1606 bool refcounted; 1607 struct sock *sk; 1608 int ret; 1609 struct net *net = dev_net(skb->dev); 1610 1611 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1612 if (skb->pkt_type != PACKET_HOST) 1613 goto discard_it; 1614 1615 /* 1616 * Count it even if it's bad. 1617 */ 1618 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1619 1620 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1621 goto discard_it; 1622 1623 th = (const struct tcphdr *)skb->data; 1624 1625 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1626 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1627 goto bad_packet; 1628 } 1629 if (!pskb_may_pull(skb, th->doff*4)) 1630 goto discard_it; 1631 1632 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1633 goto csum_error; 1634 1635 th = (const struct tcphdr *)skb->data; 1636 hdr = ipv6_hdr(skb); 1637 1638 lookup: 1639 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1640 th->source, th->dest, inet6_iif(skb), sdif, 1641 &refcounted); 1642 if (!sk) 1643 goto no_tcp_socket; 1644 1645 process: 1646 if (sk->sk_state == TCP_TIME_WAIT) 1647 goto do_time_wait; 1648 1649 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1650 struct request_sock *req = inet_reqsk(sk); 1651 bool req_stolen = false; 1652 struct sock *nsk; 1653 1654 sk = req->rsk_listener; 1655 drop_reason = tcp_inbound_md5_hash(sk, skb, 1656 &hdr->saddr, &hdr->daddr, 1657 AF_INET6, dif, sdif); 1658 if (drop_reason) { 1659 sk_drops_add(sk, skb); 1660 reqsk_put(req); 1661 goto discard_it; 1662 } 1663 if (tcp_checksum_complete(skb)) { 1664 reqsk_put(req); 1665 goto csum_error; 1666 } 1667 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1668 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1669 if (!nsk) { 1670 inet_csk_reqsk_queue_drop_and_put(sk, req); 1671 goto lookup; 1672 } 1673 sk = nsk; 1674 /* reuseport_migrate_sock() has already held one sk_refcnt 1675 * before returning. 1676 */ 1677 } else { 1678 sock_hold(sk); 1679 } 1680 refcounted = true; 1681 nsk = NULL; 1682 if (!tcp_filter(sk, skb)) { 1683 th = (const struct tcphdr *)skb->data; 1684 hdr = ipv6_hdr(skb); 1685 tcp_v6_fill_cb(skb, hdr, th); 1686 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1687 } else { 1688 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1689 } 1690 if (!nsk) { 1691 reqsk_put(req); 1692 if (req_stolen) { 1693 /* Another cpu got exclusive access to req 1694 * and created a full blown socket. 1695 * Try to feed this packet to this socket 1696 * instead of discarding it. 1697 */ 1698 tcp_v6_restore_cb(skb); 1699 sock_put(sk); 1700 goto lookup; 1701 } 1702 goto discard_and_relse; 1703 } 1704 if (nsk == sk) { 1705 reqsk_put(req); 1706 tcp_v6_restore_cb(skb); 1707 } else if (tcp_child_process(sk, nsk, skb)) { 1708 tcp_v6_send_reset(nsk, skb); 1709 goto discard_and_relse; 1710 } else { 1711 sock_put(sk); 1712 return 0; 1713 } 1714 } 1715 1716 if (static_branch_unlikely(&ip6_min_hopcount)) { 1717 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1718 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1719 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1720 goto discard_and_relse; 1721 } 1722 } 1723 1724 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1725 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1726 goto discard_and_relse; 1727 } 1728 1729 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1730 AF_INET6, dif, sdif); 1731 if (drop_reason) 1732 goto discard_and_relse; 1733 1734 if (tcp_filter(sk, skb)) { 1735 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1736 goto discard_and_relse; 1737 } 1738 th = (const struct tcphdr *)skb->data; 1739 hdr = ipv6_hdr(skb); 1740 tcp_v6_fill_cb(skb, hdr, th); 1741 1742 skb->dev = NULL; 1743 1744 if (sk->sk_state == TCP_LISTEN) { 1745 ret = tcp_v6_do_rcv(sk, skb); 1746 goto put_and_return; 1747 } 1748 1749 sk_incoming_cpu_update(sk); 1750 1751 bh_lock_sock_nested(sk); 1752 tcp_segs_in(tcp_sk(sk), skb); 1753 ret = 0; 1754 if (!sock_owned_by_user(sk)) { 1755 ret = tcp_v6_do_rcv(sk, skb); 1756 } else { 1757 if (tcp_add_backlog(sk, skb, &drop_reason)) 1758 goto discard_and_relse; 1759 } 1760 bh_unlock_sock(sk); 1761 put_and_return: 1762 if (refcounted) 1763 sock_put(sk); 1764 return ret ? -1 : 0; 1765 1766 no_tcp_socket: 1767 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1768 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1769 goto discard_it; 1770 1771 tcp_v6_fill_cb(skb, hdr, th); 1772 1773 if (tcp_checksum_complete(skb)) { 1774 csum_error: 1775 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1776 trace_tcp_bad_csum(skb); 1777 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1778 bad_packet: 1779 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1780 } else { 1781 tcp_v6_send_reset(NULL, skb); 1782 } 1783 1784 discard_it: 1785 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1786 kfree_skb_reason(skb, drop_reason); 1787 return 0; 1788 1789 discard_and_relse: 1790 sk_drops_add(sk, skb); 1791 if (refcounted) 1792 sock_put(sk); 1793 goto discard_it; 1794 1795 do_time_wait: 1796 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1797 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1798 inet_twsk_put(inet_twsk(sk)); 1799 goto discard_it; 1800 } 1801 1802 tcp_v6_fill_cb(skb, hdr, th); 1803 1804 if (tcp_checksum_complete(skb)) { 1805 inet_twsk_put(inet_twsk(sk)); 1806 goto csum_error; 1807 } 1808 1809 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1810 case TCP_TW_SYN: 1811 { 1812 struct sock *sk2; 1813 1814 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1815 skb, __tcp_hdrlen(th), 1816 &ipv6_hdr(skb)->saddr, th->source, 1817 &ipv6_hdr(skb)->daddr, 1818 ntohs(th->dest), 1819 tcp_v6_iif_l3_slave(skb), 1820 sdif); 1821 if (sk2) { 1822 struct inet_timewait_sock *tw = inet_twsk(sk); 1823 inet_twsk_deschedule_put(tw); 1824 sk = sk2; 1825 tcp_v6_restore_cb(skb); 1826 refcounted = false; 1827 goto process; 1828 } 1829 } 1830 /* to ACK */ 1831 fallthrough; 1832 case TCP_TW_ACK: 1833 tcp_v6_timewait_ack(sk, skb); 1834 break; 1835 case TCP_TW_RST: 1836 tcp_v6_send_reset(sk, skb); 1837 inet_twsk_deschedule_put(inet_twsk(sk)); 1838 goto discard_it; 1839 case TCP_TW_SUCCESS: 1840 ; 1841 } 1842 goto discard_it; 1843 } 1844 1845 void tcp_v6_early_demux(struct sk_buff *skb) 1846 { 1847 const struct ipv6hdr *hdr; 1848 const struct tcphdr *th; 1849 struct sock *sk; 1850 1851 if (skb->pkt_type != PACKET_HOST) 1852 return; 1853 1854 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1855 return; 1856 1857 hdr = ipv6_hdr(skb); 1858 th = tcp_hdr(skb); 1859 1860 if (th->doff < sizeof(struct tcphdr) / 4) 1861 return; 1862 1863 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1864 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1865 &hdr->saddr, th->source, 1866 &hdr->daddr, ntohs(th->dest), 1867 inet6_iif(skb), inet6_sdif(skb)); 1868 if (sk) { 1869 skb->sk = sk; 1870 skb->destructor = sock_edemux; 1871 if (sk_fullsock(sk)) { 1872 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1873 1874 if (dst) 1875 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1876 if (dst && 1877 sk->sk_rx_dst_ifindex == skb->skb_iif) 1878 skb_dst_set_noref(skb, dst); 1879 } 1880 } 1881 } 1882 1883 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1884 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1885 .twsk_unique = tcp_twsk_unique, 1886 .twsk_destructor = tcp_twsk_destructor, 1887 }; 1888 1889 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1890 { 1891 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1892 } 1893 1894 const struct inet_connection_sock_af_ops ipv6_specific = { 1895 .queue_xmit = inet6_csk_xmit, 1896 .send_check = tcp_v6_send_check, 1897 .rebuild_header = inet6_sk_rebuild_header, 1898 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1899 .conn_request = tcp_v6_conn_request, 1900 .syn_recv_sock = tcp_v6_syn_recv_sock, 1901 .net_header_len = sizeof(struct ipv6hdr), 1902 .net_frag_header_len = sizeof(struct frag_hdr), 1903 .setsockopt = ipv6_setsockopt, 1904 .getsockopt = ipv6_getsockopt, 1905 .addr2sockaddr = inet6_csk_addr2sockaddr, 1906 .sockaddr_len = sizeof(struct sockaddr_in6), 1907 .mtu_reduced = tcp_v6_mtu_reduced, 1908 }; 1909 1910 #ifdef CONFIG_TCP_MD5SIG 1911 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1912 .md5_lookup = tcp_v6_md5_lookup, 1913 .calc_md5_hash = tcp_v6_md5_hash_skb, 1914 .md5_parse = tcp_v6_parse_md5_keys, 1915 }; 1916 #endif 1917 1918 /* 1919 * TCP over IPv4 via INET6 API 1920 */ 1921 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1922 .queue_xmit = ip_queue_xmit, 1923 .send_check = tcp_v4_send_check, 1924 .rebuild_header = inet_sk_rebuild_header, 1925 .sk_rx_dst_set = inet_sk_rx_dst_set, 1926 .conn_request = tcp_v6_conn_request, 1927 .syn_recv_sock = tcp_v6_syn_recv_sock, 1928 .net_header_len = sizeof(struct iphdr), 1929 .setsockopt = ipv6_setsockopt, 1930 .getsockopt = ipv6_getsockopt, 1931 .addr2sockaddr = inet6_csk_addr2sockaddr, 1932 .sockaddr_len = sizeof(struct sockaddr_in6), 1933 .mtu_reduced = tcp_v4_mtu_reduced, 1934 }; 1935 1936 #ifdef CONFIG_TCP_MD5SIG 1937 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1938 .md5_lookup = tcp_v4_md5_lookup, 1939 .calc_md5_hash = tcp_v4_md5_hash_skb, 1940 .md5_parse = tcp_v6_parse_md5_keys, 1941 }; 1942 #endif 1943 1944 /* NOTE: A lot of things set to zero explicitly by call to 1945 * sk_alloc() so need not be done here. 1946 */ 1947 static int tcp_v6_init_sock(struct sock *sk) 1948 { 1949 struct inet_connection_sock *icsk = inet_csk(sk); 1950 1951 tcp_init_sock(sk); 1952 1953 icsk->icsk_af_ops = &ipv6_specific; 1954 1955 #ifdef CONFIG_TCP_MD5SIG 1956 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1957 #endif 1958 1959 return 0; 1960 } 1961 1962 static void tcp_v6_destroy_sock(struct sock *sk) 1963 { 1964 tcp_v4_destroy_sock(sk); 1965 inet6_destroy_sock(sk); 1966 } 1967 1968 #ifdef CONFIG_PROC_FS 1969 /* Proc filesystem TCPv6 sock list dumping. */ 1970 static void get_openreq6(struct seq_file *seq, 1971 const struct request_sock *req, int i) 1972 { 1973 long ttd = req->rsk_timer.expires - jiffies; 1974 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1975 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1976 1977 if (ttd < 0) 1978 ttd = 0; 1979 1980 seq_printf(seq, 1981 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1982 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1983 i, 1984 src->s6_addr32[0], src->s6_addr32[1], 1985 src->s6_addr32[2], src->s6_addr32[3], 1986 inet_rsk(req)->ir_num, 1987 dest->s6_addr32[0], dest->s6_addr32[1], 1988 dest->s6_addr32[2], dest->s6_addr32[3], 1989 ntohs(inet_rsk(req)->ir_rmt_port), 1990 TCP_SYN_RECV, 1991 0, 0, /* could print option size, but that is af dependent. */ 1992 1, /* timers active (only the expire timer) */ 1993 jiffies_to_clock_t(ttd), 1994 req->num_timeout, 1995 from_kuid_munged(seq_user_ns(seq), 1996 sock_i_uid(req->rsk_listener)), 1997 0, /* non standard timer */ 1998 0, /* open_requests have no inode */ 1999 0, req); 2000 } 2001 2002 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2003 { 2004 const struct in6_addr *dest, *src; 2005 __u16 destp, srcp; 2006 int timer_active; 2007 unsigned long timer_expires; 2008 const struct inet_sock *inet = inet_sk(sp); 2009 const struct tcp_sock *tp = tcp_sk(sp); 2010 const struct inet_connection_sock *icsk = inet_csk(sp); 2011 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2012 int rx_queue; 2013 int state; 2014 2015 dest = &sp->sk_v6_daddr; 2016 src = &sp->sk_v6_rcv_saddr; 2017 destp = ntohs(inet->inet_dport); 2018 srcp = ntohs(inet->inet_sport); 2019 2020 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2021 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2022 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2023 timer_active = 1; 2024 timer_expires = icsk->icsk_timeout; 2025 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2026 timer_active = 4; 2027 timer_expires = icsk->icsk_timeout; 2028 } else if (timer_pending(&sp->sk_timer)) { 2029 timer_active = 2; 2030 timer_expires = sp->sk_timer.expires; 2031 } else { 2032 timer_active = 0; 2033 timer_expires = jiffies; 2034 } 2035 2036 state = inet_sk_state_load(sp); 2037 if (state == TCP_LISTEN) 2038 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2039 else 2040 /* Because we don't lock the socket, 2041 * we might find a transient negative value. 2042 */ 2043 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2044 READ_ONCE(tp->copied_seq), 0); 2045 2046 seq_printf(seq, 2047 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2048 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2049 i, 2050 src->s6_addr32[0], src->s6_addr32[1], 2051 src->s6_addr32[2], src->s6_addr32[3], srcp, 2052 dest->s6_addr32[0], dest->s6_addr32[1], 2053 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2054 state, 2055 READ_ONCE(tp->write_seq) - tp->snd_una, 2056 rx_queue, 2057 timer_active, 2058 jiffies_delta_to_clock_t(timer_expires - jiffies), 2059 icsk->icsk_retransmits, 2060 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2061 icsk->icsk_probes_out, 2062 sock_i_ino(sp), 2063 refcount_read(&sp->sk_refcnt), sp, 2064 jiffies_to_clock_t(icsk->icsk_rto), 2065 jiffies_to_clock_t(icsk->icsk_ack.ato), 2066 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2067 tcp_snd_cwnd(tp), 2068 state == TCP_LISTEN ? 2069 fastopenq->max_qlen : 2070 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2071 ); 2072 } 2073 2074 static void get_timewait6_sock(struct seq_file *seq, 2075 struct inet_timewait_sock *tw, int i) 2076 { 2077 long delta = tw->tw_timer.expires - jiffies; 2078 const struct in6_addr *dest, *src; 2079 __u16 destp, srcp; 2080 2081 dest = &tw->tw_v6_daddr; 2082 src = &tw->tw_v6_rcv_saddr; 2083 destp = ntohs(tw->tw_dport); 2084 srcp = ntohs(tw->tw_sport); 2085 2086 seq_printf(seq, 2087 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2088 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2089 i, 2090 src->s6_addr32[0], src->s6_addr32[1], 2091 src->s6_addr32[2], src->s6_addr32[3], srcp, 2092 dest->s6_addr32[0], dest->s6_addr32[1], 2093 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2094 tw->tw_substate, 0, 0, 2095 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2096 refcount_read(&tw->tw_refcnt), tw); 2097 } 2098 2099 static int tcp6_seq_show(struct seq_file *seq, void *v) 2100 { 2101 struct tcp_iter_state *st; 2102 struct sock *sk = v; 2103 2104 if (v == SEQ_START_TOKEN) { 2105 seq_puts(seq, 2106 " sl " 2107 "local_address " 2108 "remote_address " 2109 "st tx_queue rx_queue tr tm->when retrnsmt" 2110 " uid timeout inode\n"); 2111 goto out; 2112 } 2113 st = seq->private; 2114 2115 if (sk->sk_state == TCP_TIME_WAIT) 2116 get_timewait6_sock(seq, v, st->num); 2117 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2118 get_openreq6(seq, v, st->num); 2119 else 2120 get_tcp6_sock(seq, v, st->num); 2121 out: 2122 return 0; 2123 } 2124 2125 static const struct seq_operations tcp6_seq_ops = { 2126 .show = tcp6_seq_show, 2127 .start = tcp_seq_start, 2128 .next = tcp_seq_next, 2129 .stop = tcp_seq_stop, 2130 }; 2131 2132 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2133 .family = AF_INET6, 2134 }; 2135 2136 int __net_init tcp6_proc_init(struct net *net) 2137 { 2138 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2139 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2140 return -ENOMEM; 2141 return 0; 2142 } 2143 2144 void tcp6_proc_exit(struct net *net) 2145 { 2146 remove_proc_entry("tcp6", net->proc_net); 2147 } 2148 #endif 2149 2150 struct proto tcpv6_prot = { 2151 .name = "TCPv6", 2152 .owner = THIS_MODULE, 2153 .close = tcp_close, 2154 .pre_connect = tcp_v6_pre_connect, 2155 .connect = tcp_v6_connect, 2156 .disconnect = tcp_disconnect, 2157 .accept = inet_csk_accept, 2158 .ioctl = tcp_ioctl, 2159 .init = tcp_v6_init_sock, 2160 .destroy = tcp_v6_destroy_sock, 2161 .shutdown = tcp_shutdown, 2162 .setsockopt = tcp_setsockopt, 2163 .getsockopt = tcp_getsockopt, 2164 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2165 .keepalive = tcp_set_keepalive, 2166 .recvmsg = tcp_recvmsg, 2167 .sendmsg = tcp_sendmsg, 2168 .sendpage = tcp_sendpage, 2169 .backlog_rcv = tcp_v6_do_rcv, 2170 .release_cb = tcp_release_cb, 2171 .hash = inet6_hash, 2172 .unhash = inet_unhash, 2173 .get_port = inet_csk_get_port, 2174 .put_port = inet_put_port, 2175 #ifdef CONFIG_BPF_SYSCALL 2176 .psock_update_sk_prot = tcp_bpf_update_proto, 2177 #endif 2178 .enter_memory_pressure = tcp_enter_memory_pressure, 2179 .leave_memory_pressure = tcp_leave_memory_pressure, 2180 .stream_memory_free = tcp_stream_memory_free, 2181 .sockets_allocated = &tcp_sockets_allocated, 2182 2183 .memory_allocated = &tcp_memory_allocated, 2184 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2185 2186 .memory_pressure = &tcp_memory_pressure, 2187 .orphan_count = &tcp_orphan_count, 2188 .sysctl_mem = sysctl_tcp_mem, 2189 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2190 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2191 .max_header = MAX_TCP_HEADER, 2192 .obj_size = sizeof(struct tcp6_sock), 2193 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2194 .twsk_prot = &tcp6_timewait_sock_ops, 2195 .rsk_prot = &tcp6_request_sock_ops, 2196 .h.hashinfo = &tcp_hashinfo, 2197 .no_autobind = true, 2198 .diag_destroy = tcp_abort, 2199 }; 2200 EXPORT_SYMBOL_GPL(tcpv6_prot); 2201 2202 static const struct inet6_protocol tcpv6_protocol = { 2203 .handler = tcp_v6_rcv, 2204 .err_handler = tcp_v6_err, 2205 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2206 }; 2207 2208 static struct inet_protosw tcpv6_protosw = { 2209 .type = SOCK_STREAM, 2210 .protocol = IPPROTO_TCP, 2211 .prot = &tcpv6_prot, 2212 .ops = &inet6_stream_ops, 2213 .flags = INET_PROTOSW_PERMANENT | 2214 INET_PROTOSW_ICSK, 2215 }; 2216 2217 static int __net_init tcpv6_net_init(struct net *net) 2218 { 2219 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2220 SOCK_RAW, IPPROTO_TCP, net); 2221 } 2222 2223 static void __net_exit tcpv6_net_exit(struct net *net) 2224 { 2225 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2226 } 2227 2228 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2229 { 2230 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2231 } 2232 2233 static struct pernet_operations tcpv6_net_ops = { 2234 .init = tcpv6_net_init, 2235 .exit = tcpv6_net_exit, 2236 .exit_batch = tcpv6_net_exit_batch, 2237 }; 2238 2239 int __init tcpv6_init(void) 2240 { 2241 int ret; 2242 2243 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2244 if (ret) 2245 goto out; 2246 2247 /* register inet6 protocol */ 2248 ret = inet6_register_protosw(&tcpv6_protosw); 2249 if (ret) 2250 goto out_tcpv6_protocol; 2251 2252 ret = register_pernet_subsys(&tcpv6_net_ops); 2253 if (ret) 2254 goto out_tcpv6_protosw; 2255 2256 ret = mptcpv6_init(); 2257 if (ret) 2258 goto out_tcpv6_pernet_subsys; 2259 2260 out: 2261 return ret; 2262 2263 out_tcpv6_pernet_subsys: 2264 unregister_pernet_subsys(&tcpv6_net_ops); 2265 out_tcpv6_protosw: 2266 inet6_unregister_protosw(&tcpv6_protosw); 2267 out_tcpv6_protocol: 2268 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2269 goto out; 2270 } 2271 2272 void tcpv6_exit(void) 2273 { 2274 unregister_pernet_subsys(&tcpv6_net_ops); 2275 inet6_unregister_protosw(&tcpv6_protosw); 2276 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2277 } 2278