1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 sk->sk_rx_dst = dst; 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 152 struct tcp_sock *tp = tcp_sk(sk); 153 struct in6_addr *saddr = NULL, *final_p, final; 154 struct ipv6_txoptions *opt; 155 struct flowi6 fl6; 156 struct dst_entry *dst; 157 int addr_type; 158 int err; 159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (__ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 err = inet6_hash_connect(tcp_death_row, sk); 312 if (err) 313 goto late_failure; 314 315 sk_set_txhash(sk); 316 317 if (likely(!tp->repair)) { 318 if (!tp->write_seq) 319 WRITE_ONCE(tp->write_seq, 320 secure_tcpv6_seq(np->saddr.s6_addr32, 321 sk->sk_v6_daddr.s6_addr32, 322 inet->inet_sport, 323 inet->inet_dport)); 324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 325 np->saddr.s6_addr32, 326 sk->sk_v6_daddr.s6_addr32); 327 } 328 329 if (tcp_fastopen_defer_connect(sk, &err)) 330 return err; 331 if (err) 332 goto late_failure; 333 334 err = tcp_connect(sk); 335 if (err) 336 goto late_failure; 337 338 return 0; 339 340 late_failure: 341 tcp_set_state(sk, TCP_CLOSE); 342 failure: 343 inet->inet_dport = 0; 344 sk->sk_route_caps = 0; 345 return err; 346 } 347 348 static void tcp_v6_mtu_reduced(struct sock *sk) 349 { 350 struct dst_entry *dst; 351 u32 mtu; 352 353 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 354 return; 355 356 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 357 358 /* Drop requests trying to increase our current mss. 359 * Check done in __ip6_rt_update_pmtu() is too late. 360 */ 361 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 362 return; 363 364 dst = inet6_csk_update_pmtu(sk, mtu); 365 if (!dst) 366 return; 367 368 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 369 tcp_sync_mss(sk, dst_mtu(dst)); 370 tcp_simple_retransmit(sk); 371 } 372 } 373 374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 375 u8 type, u8 code, int offset, __be32 info) 376 { 377 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 378 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 379 struct net *net = dev_net(skb->dev); 380 struct request_sock *fastopen; 381 struct ipv6_pinfo *np; 382 struct tcp_sock *tp; 383 __u32 seq, snd_una; 384 struct sock *sk; 385 bool fatal; 386 int err; 387 388 sk = __inet6_lookup_established(net, &tcp_hashinfo, 389 &hdr->daddr, th->dest, 390 &hdr->saddr, ntohs(th->source), 391 skb->dev->ifindex, inet6_sdif(skb)); 392 393 if (!sk) { 394 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 395 ICMP6_MIB_INERRORS); 396 return -ENOENT; 397 } 398 399 if (sk->sk_state == TCP_TIME_WAIT) { 400 inet_twsk_put(inet_twsk(sk)); 401 return 0; 402 } 403 seq = ntohl(th->seq); 404 fatal = icmpv6_err_convert(type, code, &err); 405 if (sk->sk_state == TCP_NEW_SYN_RECV) { 406 tcp_req_err(sk, seq, fatal); 407 return 0; 408 } 409 410 bh_lock_sock(sk); 411 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 412 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 413 414 if (sk->sk_state == TCP_CLOSE) 415 goto out; 416 417 if (static_branch_unlikely(&ip6_min_hopcount)) { 418 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 419 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 420 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 421 goto out; 422 } 423 } 424 425 tp = tcp_sk(sk); 426 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 427 fastopen = rcu_dereference(tp->fastopen_rsk); 428 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 429 if (sk->sk_state != TCP_LISTEN && 430 !between(seq, snd_una, tp->snd_nxt)) { 431 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 432 goto out; 433 } 434 435 np = tcp_inet6_sk(sk); 436 437 if (type == NDISC_REDIRECT) { 438 if (!sock_owned_by_user(sk)) { 439 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 440 441 if (dst) 442 dst->ops->redirect(dst, sk, skb); 443 } 444 goto out; 445 } 446 447 if (type == ICMPV6_PKT_TOOBIG) { 448 u32 mtu = ntohl(info); 449 450 /* We are not interested in TCP_LISTEN and open_requests 451 * (SYN-ACKs send out by Linux are always <576bytes so 452 * they should go through unfragmented). 453 */ 454 if (sk->sk_state == TCP_LISTEN) 455 goto out; 456 457 if (!ip6_sk_accept_pmtu(sk)) 458 goto out; 459 460 if (mtu < IPV6_MIN_MTU) 461 goto out; 462 463 WRITE_ONCE(tp->mtu_info, mtu); 464 465 if (!sock_owned_by_user(sk)) 466 tcp_v6_mtu_reduced(sk); 467 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 468 &sk->sk_tsq_flags)) 469 sock_hold(sk); 470 goto out; 471 } 472 473 474 /* Might be for an request_sock */ 475 switch (sk->sk_state) { 476 case TCP_SYN_SENT: 477 case TCP_SYN_RECV: 478 /* Only in fast or simultaneous open. If a fast open socket is 479 * already accepted it is treated as a connected one below. 480 */ 481 if (fastopen && !fastopen->sk) 482 break; 483 484 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 485 486 if (!sock_owned_by_user(sk)) { 487 sk->sk_err = err; 488 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 489 490 tcp_done(sk); 491 } else 492 sk->sk_err_soft = err; 493 goto out; 494 case TCP_LISTEN: 495 break; 496 default: 497 /* check if this ICMP message allows revert of backoff. 498 * (see RFC 6069) 499 */ 500 if (!fastopen && type == ICMPV6_DEST_UNREACH && 501 code == ICMPV6_NOROUTE) 502 tcp_ld_RTO_revert(sk, seq); 503 } 504 505 if (!sock_owned_by_user(sk) && np->recverr) { 506 sk->sk_err = err; 507 sk_error_report(sk); 508 } else 509 sk->sk_err_soft = err; 510 511 out: 512 bh_unlock_sock(sk); 513 sock_put(sk); 514 return 0; 515 } 516 517 518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 519 struct flowi *fl, 520 struct request_sock *req, 521 struct tcp_fastopen_cookie *foc, 522 enum tcp_synack_type synack_type, 523 struct sk_buff *syn_skb) 524 { 525 struct inet_request_sock *ireq = inet_rsk(req); 526 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 527 struct ipv6_txoptions *opt; 528 struct flowi6 *fl6 = &fl->u.ip6; 529 struct sk_buff *skb; 530 int err = -ENOMEM; 531 u8 tclass; 532 533 /* First, grab a route. */ 534 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 535 IPPROTO_TCP)) == NULL) 536 goto done; 537 538 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 539 540 if (skb) { 541 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 542 &ireq->ir_v6_rmt_addr); 543 544 fl6->daddr = ireq->ir_v6_rmt_addr; 545 if (np->repflow && ireq->pktopts) 546 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 547 548 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? 549 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 550 (np->tclass & INET_ECN_MASK) : 551 np->tclass; 552 553 if (!INET_ECN_is_capable(tclass) && 554 tcp_bpf_ca_needs_ecn((struct sock *)req)) 555 tclass |= INET_ECN_ECT_0; 556 557 rcu_read_lock(); 558 opt = ireq->ipv6_opt; 559 if (!opt) 560 opt = rcu_dereference(np->opt); 561 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 562 tclass, sk->sk_priority); 563 rcu_read_unlock(); 564 err = net_xmit_eval(err); 565 } 566 567 done: 568 return err; 569 } 570 571 572 static void tcp_v6_reqsk_destructor(struct request_sock *req) 573 { 574 kfree(inet_rsk(req)->ipv6_opt); 575 consume_skb(inet_rsk(req)->pktopts); 576 } 577 578 #ifdef CONFIG_TCP_MD5SIG 579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 580 const struct in6_addr *addr, 581 int l3index) 582 { 583 return tcp_md5_do_lookup(sk, l3index, 584 (union tcp_md5_addr *)addr, AF_INET6); 585 } 586 587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 588 const struct sock *addr_sk) 589 { 590 int l3index; 591 592 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 593 addr_sk->sk_bound_dev_if); 594 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 595 l3index); 596 } 597 598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 599 sockptr_t optval, int optlen) 600 { 601 struct tcp_md5sig cmd; 602 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 603 int l3index = 0; 604 u8 prefixlen; 605 u8 flags; 606 607 if (optlen < sizeof(cmd)) 608 return -EINVAL; 609 610 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 611 return -EFAULT; 612 613 if (sin6->sin6_family != AF_INET6) 614 return -EINVAL; 615 616 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 617 618 if (optname == TCP_MD5SIG_EXT && 619 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 620 prefixlen = cmd.tcpm_prefixlen; 621 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 622 prefixlen > 32)) 623 return -EINVAL; 624 } else { 625 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 626 } 627 628 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 630 struct net_device *dev; 631 632 rcu_read_lock(); 633 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 634 if (dev && netif_is_l3_master(dev)) 635 l3index = dev->ifindex; 636 rcu_read_unlock(); 637 638 /* ok to reference set/not set outside of rcu; 639 * right now device MUST be an L3 master 640 */ 641 if (!dev || !l3index) 642 return -EINVAL; 643 } 644 645 if (!cmd.tcpm_keylen) { 646 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 647 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 648 AF_INET, prefixlen, 649 l3index, flags); 650 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 651 AF_INET6, prefixlen, l3index, flags); 652 } 653 654 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 655 return -EINVAL; 656 657 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 658 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 659 AF_INET, prefixlen, l3index, flags, 660 cmd.tcpm_key, cmd.tcpm_keylen, 661 GFP_KERNEL); 662 663 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 664 AF_INET6, prefixlen, l3index, flags, 665 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 666 } 667 668 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 669 const struct in6_addr *daddr, 670 const struct in6_addr *saddr, 671 const struct tcphdr *th, int nbytes) 672 { 673 struct tcp6_pseudohdr *bp; 674 struct scatterlist sg; 675 struct tcphdr *_th; 676 677 bp = hp->scratch; 678 /* 1. TCP pseudo-header (RFC2460) */ 679 bp->saddr = *saddr; 680 bp->daddr = *daddr; 681 bp->protocol = cpu_to_be32(IPPROTO_TCP); 682 bp->len = cpu_to_be32(nbytes); 683 684 _th = (struct tcphdr *)(bp + 1); 685 memcpy(_th, th, sizeof(*th)); 686 _th->check = 0; 687 688 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 689 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 690 sizeof(*bp) + sizeof(*th)); 691 return crypto_ahash_update(hp->md5_req); 692 } 693 694 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 695 const struct in6_addr *daddr, struct in6_addr *saddr, 696 const struct tcphdr *th) 697 { 698 struct tcp_md5sig_pool *hp; 699 struct ahash_request *req; 700 701 hp = tcp_get_md5sig_pool(); 702 if (!hp) 703 goto clear_hash_noput; 704 req = hp->md5_req; 705 706 if (crypto_ahash_init(req)) 707 goto clear_hash; 708 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 709 goto clear_hash; 710 if (tcp_md5_hash_key(hp, key)) 711 goto clear_hash; 712 ahash_request_set_crypt(req, NULL, md5_hash, 0); 713 if (crypto_ahash_final(req)) 714 goto clear_hash; 715 716 tcp_put_md5sig_pool(); 717 return 0; 718 719 clear_hash: 720 tcp_put_md5sig_pool(); 721 clear_hash_noput: 722 memset(md5_hash, 0, 16); 723 return 1; 724 } 725 726 static int tcp_v6_md5_hash_skb(char *md5_hash, 727 const struct tcp_md5sig_key *key, 728 const struct sock *sk, 729 const struct sk_buff *skb) 730 { 731 const struct in6_addr *saddr, *daddr; 732 struct tcp_md5sig_pool *hp; 733 struct ahash_request *req; 734 const struct tcphdr *th = tcp_hdr(skb); 735 736 if (sk) { /* valid for establish/request sockets */ 737 saddr = &sk->sk_v6_rcv_saddr; 738 daddr = &sk->sk_v6_daddr; 739 } else { 740 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 741 saddr = &ip6h->saddr; 742 daddr = &ip6h->daddr; 743 } 744 745 hp = tcp_get_md5sig_pool(); 746 if (!hp) 747 goto clear_hash_noput; 748 req = hp->md5_req; 749 750 if (crypto_ahash_init(req)) 751 goto clear_hash; 752 753 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 754 goto clear_hash; 755 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 756 goto clear_hash; 757 if (tcp_md5_hash_key(hp, key)) 758 goto clear_hash; 759 ahash_request_set_crypt(req, NULL, md5_hash, 0); 760 if (crypto_ahash_final(req)) 761 goto clear_hash; 762 763 tcp_put_md5sig_pool(); 764 return 0; 765 766 clear_hash: 767 tcp_put_md5sig_pool(); 768 clear_hash_noput: 769 memset(md5_hash, 0, 16); 770 return 1; 771 } 772 773 #endif 774 775 static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 776 const struct sk_buff *skb, 777 int dif, int sdif) 778 { 779 #ifdef CONFIG_TCP_MD5SIG 780 const __u8 *hash_location = NULL; 781 struct tcp_md5sig_key *hash_expected; 782 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 783 const struct tcphdr *th = tcp_hdr(skb); 784 int genhash, l3index; 785 u8 newhash[16]; 786 787 /* sdif set, means packet ingressed via a device 788 * in an L3 domain and dif is set to the l3mdev 789 */ 790 l3index = sdif ? dif : 0; 791 792 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); 793 hash_location = tcp_parse_md5sig_option(th); 794 795 /* We've parsed the options - do we have a hash? */ 796 if (!hash_expected && !hash_location) 797 return false; 798 799 if (hash_expected && !hash_location) { 800 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 801 return true; 802 } 803 804 if (!hash_expected && hash_location) { 805 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 806 return true; 807 } 808 809 /* check the signature */ 810 genhash = tcp_v6_md5_hash_skb(newhash, 811 hash_expected, 812 NULL, skb); 813 814 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 815 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 816 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", 817 genhash ? "failed" : "mismatch", 818 &ip6h->saddr, ntohs(th->source), 819 &ip6h->daddr, ntohs(th->dest), l3index); 820 return true; 821 } 822 #endif 823 return false; 824 } 825 826 static void tcp_v6_init_req(struct request_sock *req, 827 const struct sock *sk_listener, 828 struct sk_buff *skb) 829 { 830 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 831 struct inet_request_sock *ireq = inet_rsk(req); 832 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 833 834 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 835 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 836 837 /* So that link locals have meaning */ 838 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 839 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 840 ireq->ir_iif = tcp_v6_iif(skb); 841 842 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 843 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 844 np->rxopt.bits.rxinfo || 845 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 846 np->rxopt.bits.rxohlim || np->repflow)) { 847 refcount_inc(&skb->users); 848 ireq->pktopts = skb; 849 } 850 } 851 852 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 853 struct sk_buff *skb, 854 struct flowi *fl, 855 struct request_sock *req) 856 { 857 tcp_v6_init_req(req, sk, skb); 858 859 if (security_inet_conn_request(sk, skb, req)) 860 return NULL; 861 862 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 863 } 864 865 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 866 .family = AF_INET6, 867 .obj_size = sizeof(struct tcp6_request_sock), 868 .rtx_syn_ack = tcp_rtx_synack, 869 .send_ack = tcp_v6_reqsk_send_ack, 870 .destructor = tcp_v6_reqsk_destructor, 871 .send_reset = tcp_v6_send_reset, 872 .syn_ack_timeout = tcp_syn_ack_timeout, 873 }; 874 875 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 876 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 877 sizeof(struct ipv6hdr), 878 #ifdef CONFIG_TCP_MD5SIG 879 .req_md5_lookup = tcp_v6_md5_lookup, 880 .calc_md5_hash = tcp_v6_md5_hash_skb, 881 #endif 882 #ifdef CONFIG_SYN_COOKIES 883 .cookie_init_seq = cookie_v6_init_sequence, 884 #endif 885 .route_req = tcp_v6_route_req, 886 .init_seq = tcp_v6_init_seq, 887 .init_ts_off = tcp_v6_init_ts_off, 888 .send_synack = tcp_v6_send_synack, 889 }; 890 891 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 892 u32 ack, u32 win, u32 tsval, u32 tsecr, 893 int oif, struct tcp_md5sig_key *key, int rst, 894 u8 tclass, __be32 label, u32 priority) 895 { 896 const struct tcphdr *th = tcp_hdr(skb); 897 struct tcphdr *t1; 898 struct sk_buff *buff; 899 struct flowi6 fl6; 900 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 901 struct sock *ctl_sk = net->ipv6.tcp_sk; 902 unsigned int tot_len = sizeof(struct tcphdr); 903 __be32 mrst = 0, *topt; 904 struct dst_entry *dst; 905 __u32 mark = 0; 906 907 if (tsecr) 908 tot_len += TCPOLEN_TSTAMP_ALIGNED; 909 #ifdef CONFIG_TCP_MD5SIG 910 if (key) 911 tot_len += TCPOLEN_MD5SIG_ALIGNED; 912 #endif 913 914 #ifdef CONFIG_MPTCP 915 if (rst && !key) { 916 mrst = mptcp_reset_option(skb); 917 918 if (mrst) 919 tot_len += sizeof(__be32); 920 } 921 #endif 922 923 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, 924 GFP_ATOMIC); 925 if (!buff) 926 return; 927 928 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); 929 930 t1 = skb_push(buff, tot_len); 931 skb_reset_transport_header(buff); 932 933 /* Swap the send and the receive. */ 934 memset(t1, 0, sizeof(*t1)); 935 t1->dest = th->source; 936 t1->source = th->dest; 937 t1->doff = tot_len / 4; 938 t1->seq = htonl(seq); 939 t1->ack_seq = htonl(ack); 940 t1->ack = !rst || !th->ack; 941 t1->rst = rst; 942 t1->window = htons(win); 943 944 topt = (__be32 *)(t1 + 1); 945 946 if (tsecr) { 947 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 948 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 949 *topt++ = htonl(tsval); 950 *topt++ = htonl(tsecr); 951 } 952 953 if (mrst) 954 *topt++ = mrst; 955 956 #ifdef CONFIG_TCP_MD5SIG 957 if (key) { 958 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 959 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 960 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 961 &ipv6_hdr(skb)->saddr, 962 &ipv6_hdr(skb)->daddr, t1); 963 } 964 #endif 965 966 memset(&fl6, 0, sizeof(fl6)); 967 fl6.daddr = ipv6_hdr(skb)->saddr; 968 fl6.saddr = ipv6_hdr(skb)->daddr; 969 fl6.flowlabel = label; 970 971 buff->ip_summed = CHECKSUM_PARTIAL; 972 973 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 974 975 fl6.flowi6_proto = IPPROTO_TCP; 976 if (rt6_need_strict(&fl6.daddr) && !oif) 977 fl6.flowi6_oif = tcp_v6_iif(skb); 978 else { 979 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 980 oif = skb->skb_iif; 981 982 fl6.flowi6_oif = oif; 983 } 984 985 if (sk) { 986 if (sk->sk_state == TCP_TIME_WAIT) { 987 mark = inet_twsk(sk)->tw_mark; 988 /* autoflowlabel relies on buff->hash */ 989 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 990 PKT_HASH_TYPE_L4); 991 } else { 992 mark = sk->sk_mark; 993 } 994 buff->tstamp = tcp_transmit_time(sk); 995 } 996 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 997 fl6.fl6_dport = t1->dest; 998 fl6.fl6_sport = t1->source; 999 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 1000 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 1001 1002 /* Pass a socket to ip6_dst_lookup either it is for RST 1003 * Underlying function will use this to retrieve the network 1004 * namespace 1005 */ 1006 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); 1007 if (!IS_ERR(dst)) { 1008 skb_dst_set(buff, dst); 1009 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 1010 tclass & ~INET_ECN_MASK, priority); 1011 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 1012 if (rst) 1013 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 1014 return; 1015 } 1016 1017 kfree_skb(buff); 1018 } 1019 1020 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 1021 { 1022 const struct tcphdr *th = tcp_hdr(skb); 1023 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1024 u32 seq = 0, ack_seq = 0; 1025 struct tcp_md5sig_key *key = NULL; 1026 #ifdef CONFIG_TCP_MD5SIG 1027 const __u8 *hash_location = NULL; 1028 unsigned char newhash[16]; 1029 int genhash; 1030 struct sock *sk1 = NULL; 1031 #endif 1032 __be32 label = 0; 1033 u32 priority = 0; 1034 struct net *net; 1035 int oif = 0; 1036 1037 if (th->rst) 1038 return; 1039 1040 /* If sk not NULL, it means we did a successful lookup and incoming 1041 * route had to be correct. prequeue might have dropped our dst. 1042 */ 1043 if (!sk && !ipv6_unicast_destination(skb)) 1044 return; 1045 1046 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1047 #ifdef CONFIG_TCP_MD5SIG 1048 rcu_read_lock(); 1049 hash_location = tcp_parse_md5sig_option(th); 1050 if (sk && sk_fullsock(sk)) { 1051 int l3index; 1052 1053 /* sdif set, means packet ingressed via a device 1054 * in an L3 domain and inet_iif is set to it. 1055 */ 1056 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1057 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1058 } else if (hash_location) { 1059 int dif = tcp_v6_iif_l3_slave(skb); 1060 int sdif = tcp_v6_sdif(skb); 1061 int l3index; 1062 1063 /* 1064 * active side is lost. Try to find listening socket through 1065 * source port, and then find md5 key through listening socket. 1066 * we are not loose security here: 1067 * Incoming packet is checked with md5 hash with finding key, 1068 * no RST generated if md5 hash doesn't match. 1069 */ 1070 sk1 = inet6_lookup_listener(net, 1071 &tcp_hashinfo, NULL, 0, 1072 &ipv6h->saddr, 1073 th->source, &ipv6h->daddr, 1074 ntohs(th->source), dif, sdif); 1075 if (!sk1) 1076 goto out; 1077 1078 /* sdif set, means packet ingressed via a device 1079 * in an L3 domain and dif is set to it. 1080 */ 1081 l3index = tcp_v6_sdif(skb) ? dif : 0; 1082 1083 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1084 if (!key) 1085 goto out; 1086 1087 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1088 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1089 goto out; 1090 } 1091 #endif 1092 1093 if (th->ack) 1094 seq = ntohl(th->ack_seq); 1095 else 1096 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1097 (th->doff << 2); 1098 1099 if (sk) { 1100 oif = sk->sk_bound_dev_if; 1101 if (sk_fullsock(sk)) { 1102 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1103 1104 trace_tcp_send_reset(sk, skb); 1105 if (np->repflow) 1106 label = ip6_flowlabel(ipv6h); 1107 priority = sk->sk_priority; 1108 } 1109 if (sk->sk_state == TCP_TIME_WAIT) { 1110 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1111 priority = inet_twsk(sk)->tw_priority; 1112 } 1113 } else { 1114 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1115 label = ip6_flowlabel(ipv6h); 1116 } 1117 1118 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1119 ipv6_get_dsfield(ipv6h), label, priority); 1120 1121 #ifdef CONFIG_TCP_MD5SIG 1122 out: 1123 rcu_read_unlock(); 1124 #endif 1125 } 1126 1127 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1128 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1129 struct tcp_md5sig_key *key, u8 tclass, 1130 __be32 label, u32 priority) 1131 { 1132 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1133 tclass, label, priority); 1134 } 1135 1136 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1137 { 1138 struct inet_timewait_sock *tw = inet_twsk(sk); 1139 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1140 1141 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1142 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1143 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1144 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1145 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1146 1147 inet_twsk_put(tw); 1148 } 1149 1150 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1151 struct request_sock *req) 1152 { 1153 int l3index; 1154 1155 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1156 1157 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1158 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1159 */ 1160 /* RFC 7323 2.3 1161 * The window field (SEG.WND) of every outgoing segment, with the 1162 * exception of <SYN> segments, MUST be right-shifted by 1163 * Rcv.Wind.Shift bits: 1164 */ 1165 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1166 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1167 tcp_rsk(req)->rcv_nxt, 1168 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1169 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1170 req->ts_recent, sk->sk_bound_dev_if, 1171 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1172 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1173 } 1174 1175 1176 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1177 { 1178 #ifdef CONFIG_SYN_COOKIES 1179 const struct tcphdr *th = tcp_hdr(skb); 1180 1181 if (!th->syn) 1182 sk = cookie_v6_check(sk, skb); 1183 #endif 1184 return sk; 1185 } 1186 1187 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1188 struct tcphdr *th, u32 *cookie) 1189 { 1190 u16 mss = 0; 1191 #ifdef CONFIG_SYN_COOKIES 1192 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1193 &tcp_request_sock_ipv6_ops, sk, th); 1194 if (mss) { 1195 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1196 tcp_synq_overflow(sk); 1197 } 1198 #endif 1199 return mss; 1200 } 1201 1202 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1203 { 1204 if (skb->protocol == htons(ETH_P_IP)) 1205 return tcp_v4_conn_request(sk, skb); 1206 1207 if (!ipv6_unicast_destination(skb)) 1208 goto drop; 1209 1210 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1211 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1212 return 0; 1213 } 1214 1215 return tcp_conn_request(&tcp6_request_sock_ops, 1216 &tcp_request_sock_ipv6_ops, sk, skb); 1217 1218 drop: 1219 tcp_listendrop(sk); 1220 return 0; /* don't send reset */ 1221 } 1222 1223 static void tcp_v6_restore_cb(struct sk_buff *skb) 1224 { 1225 /* We need to move header back to the beginning if xfrm6_policy_check() 1226 * and tcp_v6_fill_cb() are going to be called again. 1227 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1228 */ 1229 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1230 sizeof(struct inet6_skb_parm)); 1231 } 1232 1233 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1234 struct request_sock *req, 1235 struct dst_entry *dst, 1236 struct request_sock *req_unhash, 1237 bool *own_req) 1238 { 1239 struct inet_request_sock *ireq; 1240 struct ipv6_pinfo *newnp; 1241 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1242 struct ipv6_txoptions *opt; 1243 struct inet_sock *newinet; 1244 bool found_dup_sk = false; 1245 struct tcp_sock *newtp; 1246 struct sock *newsk; 1247 #ifdef CONFIG_TCP_MD5SIG 1248 struct tcp_md5sig_key *key; 1249 int l3index; 1250 #endif 1251 struct flowi6 fl6; 1252 1253 if (skb->protocol == htons(ETH_P_IP)) { 1254 /* 1255 * v6 mapped 1256 */ 1257 1258 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1259 req_unhash, own_req); 1260 1261 if (!newsk) 1262 return NULL; 1263 1264 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1265 1266 newinet = inet_sk(newsk); 1267 newnp = tcp_inet6_sk(newsk); 1268 newtp = tcp_sk(newsk); 1269 1270 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1271 1272 newnp->saddr = newsk->sk_v6_rcv_saddr; 1273 1274 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1275 if (sk_is_mptcp(newsk)) 1276 mptcpv6_handle_mapped(newsk, true); 1277 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1278 #ifdef CONFIG_TCP_MD5SIG 1279 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1280 #endif 1281 1282 newnp->ipv6_mc_list = NULL; 1283 newnp->ipv6_ac_list = NULL; 1284 newnp->ipv6_fl_list = NULL; 1285 newnp->pktoptions = NULL; 1286 newnp->opt = NULL; 1287 newnp->mcast_oif = inet_iif(skb); 1288 newnp->mcast_hops = ip_hdr(skb)->ttl; 1289 newnp->rcv_flowinfo = 0; 1290 if (np->repflow) 1291 newnp->flow_label = 0; 1292 1293 /* 1294 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1295 * here, tcp_create_openreq_child now does this for us, see the comment in 1296 * that function for the gory details. -acme 1297 */ 1298 1299 /* It is tricky place. Until this moment IPv4 tcp 1300 worked with IPv6 icsk.icsk_af_ops. 1301 Sync it now. 1302 */ 1303 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1304 1305 return newsk; 1306 } 1307 1308 ireq = inet_rsk(req); 1309 1310 if (sk_acceptq_is_full(sk)) 1311 goto out_overflow; 1312 1313 if (!dst) { 1314 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1315 if (!dst) 1316 goto out; 1317 } 1318 1319 newsk = tcp_create_openreq_child(sk, req, skb); 1320 if (!newsk) 1321 goto out_nonewsk; 1322 1323 /* 1324 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1325 * count here, tcp_create_openreq_child now does this for us, see the 1326 * comment in that function for the gory details. -acme 1327 */ 1328 1329 newsk->sk_gso_type = SKB_GSO_TCPV6; 1330 ip6_dst_store(newsk, dst, NULL, NULL); 1331 inet6_sk_rx_dst_set(newsk, skb); 1332 1333 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1334 1335 newtp = tcp_sk(newsk); 1336 newinet = inet_sk(newsk); 1337 newnp = tcp_inet6_sk(newsk); 1338 1339 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1340 1341 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1342 newnp->saddr = ireq->ir_v6_loc_addr; 1343 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1344 newsk->sk_bound_dev_if = ireq->ir_iif; 1345 1346 /* Now IPv6 options... 1347 1348 First: no IPv4 options. 1349 */ 1350 newinet->inet_opt = NULL; 1351 newnp->ipv6_mc_list = NULL; 1352 newnp->ipv6_ac_list = NULL; 1353 newnp->ipv6_fl_list = NULL; 1354 1355 /* Clone RX bits */ 1356 newnp->rxopt.all = np->rxopt.all; 1357 1358 newnp->pktoptions = NULL; 1359 newnp->opt = NULL; 1360 newnp->mcast_oif = tcp_v6_iif(skb); 1361 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1362 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1363 if (np->repflow) 1364 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1365 1366 /* Set ToS of the new socket based upon the value of incoming SYN. 1367 * ECT bits are set later in tcp_init_transfer(). 1368 */ 1369 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) 1370 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1371 1372 /* Clone native IPv6 options from listening socket (if any) 1373 1374 Yes, keeping reference count would be much more clever, 1375 but we make one more one thing there: reattach optmem 1376 to newsk. 1377 */ 1378 opt = ireq->ipv6_opt; 1379 if (!opt) 1380 opt = rcu_dereference(np->opt); 1381 if (opt) { 1382 opt = ipv6_dup_options(newsk, opt); 1383 RCU_INIT_POINTER(newnp->opt, opt); 1384 } 1385 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1386 if (opt) 1387 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1388 opt->opt_flen; 1389 1390 tcp_ca_openreq_child(newsk, dst); 1391 1392 tcp_sync_mss(newsk, dst_mtu(dst)); 1393 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1394 1395 tcp_initialize_rcv_mss(newsk); 1396 1397 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1398 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1399 1400 #ifdef CONFIG_TCP_MD5SIG 1401 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1402 1403 /* Copy over the MD5 key from the original socket */ 1404 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1405 if (key) { 1406 /* We're using one, so create a matching key 1407 * on the newsk structure. If we fail to get 1408 * memory, then we end up not copying the key 1409 * across. Shucks. 1410 */ 1411 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1412 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1413 sk_gfp_mask(sk, GFP_ATOMIC)); 1414 } 1415 #endif 1416 1417 if (__inet_inherit_port(sk, newsk) < 0) { 1418 inet_csk_prepare_forced_close(newsk); 1419 tcp_done(newsk); 1420 goto out; 1421 } 1422 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1423 &found_dup_sk); 1424 if (*own_req) { 1425 tcp_move_syn(newtp, req); 1426 1427 /* Clone pktoptions received with SYN, if we own the req */ 1428 if (ireq->pktopts) { 1429 newnp->pktoptions = skb_clone(ireq->pktopts, 1430 sk_gfp_mask(sk, GFP_ATOMIC)); 1431 consume_skb(ireq->pktopts); 1432 ireq->pktopts = NULL; 1433 if (newnp->pktoptions) { 1434 tcp_v6_restore_cb(newnp->pktoptions); 1435 skb_set_owner_r(newnp->pktoptions, newsk); 1436 } 1437 } 1438 } else { 1439 if (!req_unhash && found_dup_sk) { 1440 /* This code path should only be executed in the 1441 * syncookie case only 1442 */ 1443 bh_unlock_sock(newsk); 1444 sock_put(newsk); 1445 newsk = NULL; 1446 } 1447 } 1448 1449 return newsk; 1450 1451 out_overflow: 1452 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1453 out_nonewsk: 1454 dst_release(dst); 1455 out: 1456 tcp_listendrop(sk); 1457 return NULL; 1458 } 1459 1460 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1461 u32)); 1462 /* The socket must have it's spinlock held when we get 1463 * here, unless it is a TCP_LISTEN socket. 1464 * 1465 * We have a potential double-lock case here, so even when 1466 * doing backlog processing we use the BH locking scheme. 1467 * This is because we cannot sleep with the original spinlock 1468 * held. 1469 */ 1470 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1471 { 1472 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1473 struct sk_buff *opt_skb = NULL; 1474 struct tcp_sock *tp; 1475 1476 /* Imagine: socket is IPv6. IPv4 packet arrives, 1477 goes to IPv4 receive handler and backlogged. 1478 From backlog it always goes here. Kerboom... 1479 Fortunately, tcp_rcv_established and rcv_established 1480 handle them correctly, but it is not case with 1481 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1482 */ 1483 1484 if (skb->protocol == htons(ETH_P_IP)) 1485 return tcp_v4_do_rcv(sk, skb); 1486 1487 /* 1488 * socket locking is here for SMP purposes as backlog rcv 1489 * is currently called with bh processing disabled. 1490 */ 1491 1492 /* Do Stevens' IPV6_PKTOPTIONS. 1493 1494 Yes, guys, it is the only place in our code, where we 1495 may make it not affecting IPv4. 1496 The rest of code is protocol independent, 1497 and I do not like idea to uglify IPv4. 1498 1499 Actually, all the idea behind IPV6_PKTOPTIONS 1500 looks not very well thought. For now we latch 1501 options, received in the last packet, enqueued 1502 by tcp. Feel free to propose better solution. 1503 --ANK (980728) 1504 */ 1505 if (np->rxopt.all) 1506 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1507 1508 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1509 struct dst_entry *dst = sk->sk_rx_dst; 1510 1511 sock_rps_save_rxhash(sk, skb); 1512 sk_mark_napi_id(sk, skb); 1513 if (dst) { 1514 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1515 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1516 dst, sk->sk_rx_dst_cookie) == NULL) { 1517 dst_release(dst); 1518 sk->sk_rx_dst = NULL; 1519 } 1520 } 1521 1522 tcp_rcv_established(sk, skb); 1523 if (opt_skb) 1524 goto ipv6_pktoptions; 1525 return 0; 1526 } 1527 1528 if (tcp_checksum_complete(skb)) 1529 goto csum_err; 1530 1531 if (sk->sk_state == TCP_LISTEN) { 1532 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1533 1534 if (!nsk) 1535 goto discard; 1536 1537 if (nsk != sk) { 1538 if (tcp_child_process(sk, nsk, skb)) 1539 goto reset; 1540 if (opt_skb) 1541 __kfree_skb(opt_skb); 1542 return 0; 1543 } 1544 } else 1545 sock_rps_save_rxhash(sk, skb); 1546 1547 if (tcp_rcv_state_process(sk, skb)) 1548 goto reset; 1549 if (opt_skb) 1550 goto ipv6_pktoptions; 1551 return 0; 1552 1553 reset: 1554 tcp_v6_send_reset(sk, skb); 1555 discard: 1556 if (opt_skb) 1557 __kfree_skb(opt_skb); 1558 kfree_skb(skb); 1559 return 0; 1560 csum_err: 1561 trace_tcp_bad_csum(skb); 1562 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1563 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1564 goto discard; 1565 1566 1567 ipv6_pktoptions: 1568 /* Do you ask, what is it? 1569 1570 1. skb was enqueued by tcp. 1571 2. skb is added to tail of read queue, rather than out of order. 1572 3. socket is not in passive state. 1573 4. Finally, it really contains options, which user wants to receive. 1574 */ 1575 tp = tcp_sk(sk); 1576 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1577 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1578 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1579 np->mcast_oif = tcp_v6_iif(opt_skb); 1580 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1581 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1582 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1583 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1584 if (np->repflow) 1585 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1586 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1587 skb_set_owner_r(opt_skb, sk); 1588 tcp_v6_restore_cb(opt_skb); 1589 opt_skb = xchg(&np->pktoptions, opt_skb); 1590 } else { 1591 __kfree_skb(opt_skb); 1592 opt_skb = xchg(&np->pktoptions, NULL); 1593 } 1594 } 1595 1596 consume_skb(opt_skb); 1597 return 0; 1598 } 1599 1600 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1601 const struct tcphdr *th) 1602 { 1603 /* This is tricky: we move IP6CB at its correct location into 1604 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1605 * _decode_session6() uses IP6CB(). 1606 * barrier() makes sure compiler won't play aliasing games. 1607 */ 1608 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1609 sizeof(struct inet6_skb_parm)); 1610 barrier(); 1611 1612 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1613 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1614 skb->len - th->doff*4); 1615 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1616 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1617 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1618 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1619 TCP_SKB_CB(skb)->sacked = 0; 1620 TCP_SKB_CB(skb)->has_rxtstamp = 1621 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1622 } 1623 1624 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1625 { 1626 int sdif = inet6_sdif(skb); 1627 int dif = inet6_iif(skb); 1628 const struct tcphdr *th; 1629 const struct ipv6hdr *hdr; 1630 bool refcounted; 1631 struct sock *sk; 1632 int ret; 1633 struct net *net = dev_net(skb->dev); 1634 1635 if (skb->pkt_type != PACKET_HOST) 1636 goto discard_it; 1637 1638 /* 1639 * Count it even if it's bad. 1640 */ 1641 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1642 1643 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1644 goto discard_it; 1645 1646 th = (const struct tcphdr *)skb->data; 1647 1648 if (unlikely(th->doff < sizeof(struct tcphdr)/4)) 1649 goto bad_packet; 1650 if (!pskb_may_pull(skb, th->doff*4)) 1651 goto discard_it; 1652 1653 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1654 goto csum_error; 1655 1656 th = (const struct tcphdr *)skb->data; 1657 hdr = ipv6_hdr(skb); 1658 1659 lookup: 1660 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1661 th->source, th->dest, inet6_iif(skb), sdif, 1662 &refcounted); 1663 if (!sk) 1664 goto no_tcp_socket; 1665 1666 process: 1667 if (sk->sk_state == TCP_TIME_WAIT) 1668 goto do_time_wait; 1669 1670 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1671 struct request_sock *req = inet_reqsk(sk); 1672 bool req_stolen = false; 1673 struct sock *nsk; 1674 1675 sk = req->rsk_listener; 1676 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { 1677 sk_drops_add(sk, skb); 1678 reqsk_put(req); 1679 goto discard_it; 1680 } 1681 if (tcp_checksum_complete(skb)) { 1682 reqsk_put(req); 1683 goto csum_error; 1684 } 1685 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1686 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1687 if (!nsk) { 1688 inet_csk_reqsk_queue_drop_and_put(sk, req); 1689 goto lookup; 1690 } 1691 sk = nsk; 1692 /* reuseport_migrate_sock() has already held one sk_refcnt 1693 * before returning. 1694 */ 1695 } else { 1696 sock_hold(sk); 1697 } 1698 refcounted = true; 1699 nsk = NULL; 1700 if (!tcp_filter(sk, skb)) { 1701 th = (const struct tcphdr *)skb->data; 1702 hdr = ipv6_hdr(skb); 1703 tcp_v6_fill_cb(skb, hdr, th); 1704 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1705 } 1706 if (!nsk) { 1707 reqsk_put(req); 1708 if (req_stolen) { 1709 /* Another cpu got exclusive access to req 1710 * and created a full blown socket. 1711 * Try to feed this packet to this socket 1712 * instead of discarding it. 1713 */ 1714 tcp_v6_restore_cb(skb); 1715 sock_put(sk); 1716 goto lookup; 1717 } 1718 goto discard_and_relse; 1719 } 1720 if (nsk == sk) { 1721 reqsk_put(req); 1722 tcp_v6_restore_cb(skb); 1723 } else if (tcp_child_process(sk, nsk, skb)) { 1724 tcp_v6_send_reset(nsk, skb); 1725 goto discard_and_relse; 1726 } else { 1727 sock_put(sk); 1728 return 0; 1729 } 1730 } 1731 1732 if (static_branch_unlikely(&ip6_min_hopcount)) { 1733 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1734 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1735 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1736 goto discard_and_relse; 1737 } 1738 } 1739 1740 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1741 goto discard_and_relse; 1742 1743 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) 1744 goto discard_and_relse; 1745 1746 if (tcp_filter(sk, skb)) 1747 goto discard_and_relse; 1748 th = (const struct tcphdr *)skb->data; 1749 hdr = ipv6_hdr(skb); 1750 tcp_v6_fill_cb(skb, hdr, th); 1751 1752 skb->dev = NULL; 1753 1754 if (sk->sk_state == TCP_LISTEN) { 1755 ret = tcp_v6_do_rcv(sk, skb); 1756 goto put_and_return; 1757 } 1758 1759 sk_incoming_cpu_update(sk); 1760 1761 bh_lock_sock_nested(sk); 1762 tcp_segs_in(tcp_sk(sk), skb); 1763 ret = 0; 1764 if (!sock_owned_by_user(sk)) { 1765 ret = tcp_v6_do_rcv(sk, skb); 1766 } else { 1767 if (tcp_add_backlog(sk, skb)) 1768 goto discard_and_relse; 1769 } 1770 bh_unlock_sock(sk); 1771 put_and_return: 1772 if (refcounted) 1773 sock_put(sk); 1774 return ret ? -1 : 0; 1775 1776 no_tcp_socket: 1777 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1778 goto discard_it; 1779 1780 tcp_v6_fill_cb(skb, hdr, th); 1781 1782 if (tcp_checksum_complete(skb)) { 1783 csum_error: 1784 trace_tcp_bad_csum(skb); 1785 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1786 bad_packet: 1787 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1788 } else { 1789 tcp_v6_send_reset(NULL, skb); 1790 } 1791 1792 discard_it: 1793 kfree_skb(skb); 1794 return 0; 1795 1796 discard_and_relse: 1797 sk_drops_add(sk, skb); 1798 if (refcounted) 1799 sock_put(sk); 1800 goto discard_it; 1801 1802 do_time_wait: 1803 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1804 inet_twsk_put(inet_twsk(sk)); 1805 goto discard_it; 1806 } 1807 1808 tcp_v6_fill_cb(skb, hdr, th); 1809 1810 if (tcp_checksum_complete(skb)) { 1811 inet_twsk_put(inet_twsk(sk)); 1812 goto csum_error; 1813 } 1814 1815 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1816 case TCP_TW_SYN: 1817 { 1818 struct sock *sk2; 1819 1820 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1821 skb, __tcp_hdrlen(th), 1822 &ipv6_hdr(skb)->saddr, th->source, 1823 &ipv6_hdr(skb)->daddr, 1824 ntohs(th->dest), 1825 tcp_v6_iif_l3_slave(skb), 1826 sdif); 1827 if (sk2) { 1828 struct inet_timewait_sock *tw = inet_twsk(sk); 1829 inet_twsk_deschedule_put(tw); 1830 sk = sk2; 1831 tcp_v6_restore_cb(skb); 1832 refcounted = false; 1833 goto process; 1834 } 1835 } 1836 /* to ACK */ 1837 fallthrough; 1838 case TCP_TW_ACK: 1839 tcp_v6_timewait_ack(sk, skb); 1840 break; 1841 case TCP_TW_RST: 1842 tcp_v6_send_reset(sk, skb); 1843 inet_twsk_deschedule_put(inet_twsk(sk)); 1844 goto discard_it; 1845 case TCP_TW_SUCCESS: 1846 ; 1847 } 1848 goto discard_it; 1849 } 1850 1851 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) 1852 { 1853 const struct ipv6hdr *hdr; 1854 const struct tcphdr *th; 1855 struct sock *sk; 1856 1857 if (skb->pkt_type != PACKET_HOST) 1858 return; 1859 1860 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1861 return; 1862 1863 hdr = ipv6_hdr(skb); 1864 th = tcp_hdr(skb); 1865 1866 if (th->doff < sizeof(struct tcphdr) / 4) 1867 return; 1868 1869 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1870 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1871 &hdr->saddr, th->source, 1872 &hdr->daddr, ntohs(th->dest), 1873 inet6_iif(skb), inet6_sdif(skb)); 1874 if (sk) { 1875 skb->sk = sk; 1876 skb->destructor = sock_edemux; 1877 if (sk_fullsock(sk)) { 1878 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); 1879 1880 if (dst) 1881 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1882 if (dst && 1883 sk->sk_rx_dst_ifindex == skb->skb_iif) 1884 skb_dst_set_noref(skb, dst); 1885 } 1886 } 1887 } 1888 1889 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1890 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1891 .twsk_unique = tcp_twsk_unique, 1892 .twsk_destructor = tcp_twsk_destructor, 1893 }; 1894 1895 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1896 { 1897 struct ipv6_pinfo *np = inet6_sk(sk); 1898 1899 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); 1900 } 1901 1902 const struct inet_connection_sock_af_ops ipv6_specific = { 1903 .queue_xmit = inet6_csk_xmit, 1904 .send_check = tcp_v6_send_check, 1905 .rebuild_header = inet6_sk_rebuild_header, 1906 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1907 .conn_request = tcp_v6_conn_request, 1908 .syn_recv_sock = tcp_v6_syn_recv_sock, 1909 .net_header_len = sizeof(struct ipv6hdr), 1910 .net_frag_header_len = sizeof(struct frag_hdr), 1911 .setsockopt = ipv6_setsockopt, 1912 .getsockopt = ipv6_getsockopt, 1913 .addr2sockaddr = inet6_csk_addr2sockaddr, 1914 .sockaddr_len = sizeof(struct sockaddr_in6), 1915 .mtu_reduced = tcp_v6_mtu_reduced, 1916 }; 1917 1918 #ifdef CONFIG_TCP_MD5SIG 1919 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1920 .md5_lookup = tcp_v6_md5_lookup, 1921 .calc_md5_hash = tcp_v6_md5_hash_skb, 1922 .md5_parse = tcp_v6_parse_md5_keys, 1923 }; 1924 #endif 1925 1926 /* 1927 * TCP over IPv4 via INET6 API 1928 */ 1929 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1930 .queue_xmit = ip_queue_xmit, 1931 .send_check = tcp_v4_send_check, 1932 .rebuild_header = inet_sk_rebuild_header, 1933 .sk_rx_dst_set = inet_sk_rx_dst_set, 1934 .conn_request = tcp_v6_conn_request, 1935 .syn_recv_sock = tcp_v6_syn_recv_sock, 1936 .net_header_len = sizeof(struct iphdr), 1937 .setsockopt = ipv6_setsockopt, 1938 .getsockopt = ipv6_getsockopt, 1939 .addr2sockaddr = inet6_csk_addr2sockaddr, 1940 .sockaddr_len = sizeof(struct sockaddr_in6), 1941 .mtu_reduced = tcp_v4_mtu_reduced, 1942 }; 1943 1944 #ifdef CONFIG_TCP_MD5SIG 1945 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1946 .md5_lookup = tcp_v4_md5_lookup, 1947 .calc_md5_hash = tcp_v4_md5_hash_skb, 1948 .md5_parse = tcp_v6_parse_md5_keys, 1949 }; 1950 #endif 1951 1952 /* NOTE: A lot of things set to zero explicitly by call to 1953 * sk_alloc() so need not be done here. 1954 */ 1955 static int tcp_v6_init_sock(struct sock *sk) 1956 { 1957 struct inet_connection_sock *icsk = inet_csk(sk); 1958 1959 tcp_init_sock(sk); 1960 1961 icsk->icsk_af_ops = &ipv6_specific; 1962 1963 #ifdef CONFIG_TCP_MD5SIG 1964 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1965 #endif 1966 1967 return 0; 1968 } 1969 1970 static void tcp_v6_destroy_sock(struct sock *sk) 1971 { 1972 tcp_v4_destroy_sock(sk); 1973 inet6_destroy_sock(sk); 1974 } 1975 1976 #ifdef CONFIG_PROC_FS 1977 /* Proc filesystem TCPv6 sock list dumping. */ 1978 static void get_openreq6(struct seq_file *seq, 1979 const struct request_sock *req, int i) 1980 { 1981 long ttd = req->rsk_timer.expires - jiffies; 1982 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1983 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1984 1985 if (ttd < 0) 1986 ttd = 0; 1987 1988 seq_printf(seq, 1989 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1990 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1991 i, 1992 src->s6_addr32[0], src->s6_addr32[1], 1993 src->s6_addr32[2], src->s6_addr32[3], 1994 inet_rsk(req)->ir_num, 1995 dest->s6_addr32[0], dest->s6_addr32[1], 1996 dest->s6_addr32[2], dest->s6_addr32[3], 1997 ntohs(inet_rsk(req)->ir_rmt_port), 1998 TCP_SYN_RECV, 1999 0, 0, /* could print option size, but that is af dependent. */ 2000 1, /* timers active (only the expire timer) */ 2001 jiffies_to_clock_t(ttd), 2002 req->num_timeout, 2003 from_kuid_munged(seq_user_ns(seq), 2004 sock_i_uid(req->rsk_listener)), 2005 0, /* non standard timer */ 2006 0, /* open_requests have no inode */ 2007 0, req); 2008 } 2009 2010 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2011 { 2012 const struct in6_addr *dest, *src; 2013 __u16 destp, srcp; 2014 int timer_active; 2015 unsigned long timer_expires; 2016 const struct inet_sock *inet = inet_sk(sp); 2017 const struct tcp_sock *tp = tcp_sk(sp); 2018 const struct inet_connection_sock *icsk = inet_csk(sp); 2019 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2020 int rx_queue; 2021 int state; 2022 2023 dest = &sp->sk_v6_daddr; 2024 src = &sp->sk_v6_rcv_saddr; 2025 destp = ntohs(inet->inet_dport); 2026 srcp = ntohs(inet->inet_sport); 2027 2028 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2029 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2030 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2031 timer_active = 1; 2032 timer_expires = icsk->icsk_timeout; 2033 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2034 timer_active = 4; 2035 timer_expires = icsk->icsk_timeout; 2036 } else if (timer_pending(&sp->sk_timer)) { 2037 timer_active = 2; 2038 timer_expires = sp->sk_timer.expires; 2039 } else { 2040 timer_active = 0; 2041 timer_expires = jiffies; 2042 } 2043 2044 state = inet_sk_state_load(sp); 2045 if (state == TCP_LISTEN) 2046 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2047 else 2048 /* Because we don't lock the socket, 2049 * we might find a transient negative value. 2050 */ 2051 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2052 READ_ONCE(tp->copied_seq), 0); 2053 2054 seq_printf(seq, 2055 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2056 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2057 i, 2058 src->s6_addr32[0], src->s6_addr32[1], 2059 src->s6_addr32[2], src->s6_addr32[3], srcp, 2060 dest->s6_addr32[0], dest->s6_addr32[1], 2061 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2062 state, 2063 READ_ONCE(tp->write_seq) - tp->snd_una, 2064 rx_queue, 2065 timer_active, 2066 jiffies_delta_to_clock_t(timer_expires - jiffies), 2067 icsk->icsk_retransmits, 2068 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2069 icsk->icsk_probes_out, 2070 sock_i_ino(sp), 2071 refcount_read(&sp->sk_refcnt), sp, 2072 jiffies_to_clock_t(icsk->icsk_rto), 2073 jiffies_to_clock_t(icsk->icsk_ack.ato), 2074 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2075 tp->snd_cwnd, 2076 state == TCP_LISTEN ? 2077 fastopenq->max_qlen : 2078 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2079 ); 2080 } 2081 2082 static void get_timewait6_sock(struct seq_file *seq, 2083 struct inet_timewait_sock *tw, int i) 2084 { 2085 long delta = tw->tw_timer.expires - jiffies; 2086 const struct in6_addr *dest, *src; 2087 __u16 destp, srcp; 2088 2089 dest = &tw->tw_v6_daddr; 2090 src = &tw->tw_v6_rcv_saddr; 2091 destp = ntohs(tw->tw_dport); 2092 srcp = ntohs(tw->tw_sport); 2093 2094 seq_printf(seq, 2095 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2096 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2097 i, 2098 src->s6_addr32[0], src->s6_addr32[1], 2099 src->s6_addr32[2], src->s6_addr32[3], srcp, 2100 dest->s6_addr32[0], dest->s6_addr32[1], 2101 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2102 tw->tw_substate, 0, 0, 2103 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2104 refcount_read(&tw->tw_refcnt), tw); 2105 } 2106 2107 static int tcp6_seq_show(struct seq_file *seq, void *v) 2108 { 2109 struct tcp_iter_state *st; 2110 struct sock *sk = v; 2111 2112 if (v == SEQ_START_TOKEN) { 2113 seq_puts(seq, 2114 " sl " 2115 "local_address " 2116 "remote_address " 2117 "st tx_queue rx_queue tr tm->when retrnsmt" 2118 " uid timeout inode\n"); 2119 goto out; 2120 } 2121 st = seq->private; 2122 2123 if (sk->sk_state == TCP_TIME_WAIT) 2124 get_timewait6_sock(seq, v, st->num); 2125 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2126 get_openreq6(seq, v, st->num); 2127 else 2128 get_tcp6_sock(seq, v, st->num); 2129 out: 2130 return 0; 2131 } 2132 2133 static const struct seq_operations tcp6_seq_ops = { 2134 .show = tcp6_seq_show, 2135 .start = tcp_seq_start, 2136 .next = tcp_seq_next, 2137 .stop = tcp_seq_stop, 2138 }; 2139 2140 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2141 .family = AF_INET6, 2142 }; 2143 2144 int __net_init tcp6_proc_init(struct net *net) 2145 { 2146 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2147 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2148 return -ENOMEM; 2149 return 0; 2150 } 2151 2152 void tcp6_proc_exit(struct net *net) 2153 { 2154 remove_proc_entry("tcp6", net->proc_net); 2155 } 2156 #endif 2157 2158 struct proto tcpv6_prot = { 2159 .name = "TCPv6", 2160 .owner = THIS_MODULE, 2161 .close = tcp_close, 2162 .pre_connect = tcp_v6_pre_connect, 2163 .connect = tcp_v6_connect, 2164 .disconnect = tcp_disconnect, 2165 .accept = inet_csk_accept, 2166 .ioctl = tcp_ioctl, 2167 .init = tcp_v6_init_sock, 2168 .destroy = tcp_v6_destroy_sock, 2169 .shutdown = tcp_shutdown, 2170 .setsockopt = tcp_setsockopt, 2171 .getsockopt = tcp_getsockopt, 2172 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2173 .keepalive = tcp_set_keepalive, 2174 .recvmsg = tcp_recvmsg, 2175 .sendmsg = tcp_sendmsg, 2176 .sendpage = tcp_sendpage, 2177 .backlog_rcv = tcp_v6_do_rcv, 2178 .release_cb = tcp_release_cb, 2179 .hash = inet6_hash, 2180 .unhash = inet_unhash, 2181 .get_port = inet_csk_get_port, 2182 #ifdef CONFIG_BPF_SYSCALL 2183 .psock_update_sk_prot = tcp_bpf_update_proto, 2184 #endif 2185 .enter_memory_pressure = tcp_enter_memory_pressure, 2186 .leave_memory_pressure = tcp_leave_memory_pressure, 2187 .stream_memory_free = tcp_stream_memory_free, 2188 .sockets_allocated = &tcp_sockets_allocated, 2189 .memory_allocated = &tcp_memory_allocated, 2190 .memory_pressure = &tcp_memory_pressure, 2191 .orphan_count = &tcp_orphan_count, 2192 .sysctl_mem = sysctl_tcp_mem, 2193 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2194 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2195 .max_header = MAX_TCP_HEADER, 2196 .obj_size = sizeof(struct tcp6_sock), 2197 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2198 .twsk_prot = &tcp6_timewait_sock_ops, 2199 .rsk_prot = &tcp6_request_sock_ops, 2200 .h.hashinfo = &tcp_hashinfo, 2201 .no_autobind = true, 2202 .diag_destroy = tcp_abort, 2203 }; 2204 EXPORT_SYMBOL_GPL(tcpv6_prot); 2205 2206 /* thinking of making this const? Don't. 2207 * early_demux can change based on sysctl. 2208 */ 2209 static struct inet6_protocol tcpv6_protocol = { 2210 .early_demux = tcp_v6_early_demux, 2211 .early_demux_handler = tcp_v6_early_demux, 2212 .handler = tcp_v6_rcv, 2213 .err_handler = tcp_v6_err, 2214 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2215 }; 2216 2217 static struct inet_protosw tcpv6_protosw = { 2218 .type = SOCK_STREAM, 2219 .protocol = IPPROTO_TCP, 2220 .prot = &tcpv6_prot, 2221 .ops = &inet6_stream_ops, 2222 .flags = INET_PROTOSW_PERMANENT | 2223 INET_PROTOSW_ICSK, 2224 }; 2225 2226 static int __net_init tcpv6_net_init(struct net *net) 2227 { 2228 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2229 SOCK_RAW, IPPROTO_TCP, net); 2230 } 2231 2232 static void __net_exit tcpv6_net_exit(struct net *net) 2233 { 2234 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2235 } 2236 2237 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2238 { 2239 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2240 } 2241 2242 static struct pernet_operations tcpv6_net_ops = { 2243 .init = tcpv6_net_init, 2244 .exit = tcpv6_net_exit, 2245 .exit_batch = tcpv6_net_exit_batch, 2246 }; 2247 2248 int __init tcpv6_init(void) 2249 { 2250 int ret; 2251 2252 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2253 if (ret) 2254 goto out; 2255 2256 /* register inet6 protocol */ 2257 ret = inet6_register_protosw(&tcpv6_protosw); 2258 if (ret) 2259 goto out_tcpv6_protocol; 2260 2261 ret = register_pernet_subsys(&tcpv6_net_ops); 2262 if (ret) 2263 goto out_tcpv6_protosw; 2264 2265 ret = mptcpv6_init(); 2266 if (ret) 2267 goto out_tcpv6_pernet_subsys; 2268 2269 out: 2270 return ret; 2271 2272 out_tcpv6_pernet_subsys: 2273 unregister_pernet_subsys(&tcpv6_net_ops); 2274 out_tcpv6_protosw: 2275 inet6_unregister_protosw(&tcpv6_protosw); 2276 out_tcpv6_protocol: 2277 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2278 goto out; 2279 } 2280 2281 void tcpv6_exit(void) 2282 { 2283 unregister_pernet_subsys(&tcpv6_net_ops); 2284 inet6_unregister_protosw(&tcpv6_protosw); 2285 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2286 } 2287