1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 97 struct tcp6_sock, tcp)->inet6) 98 99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 100 { 101 struct dst_entry *dst = skb_dst(skb); 102 103 if (dst && dst_hold_safe(dst)) { 104 rcu_assign_pointer(sk->sk_rx_dst, dst); 105 sk->sk_rx_dst_ifindex = skb->skb_iif; 106 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 107 } 108 } 109 110 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 111 { 112 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 113 ipv6_hdr(skb)->saddr.s6_addr32, 114 tcp_hdr(skb)->dest, 115 tcp_hdr(skb)->source); 116 } 117 118 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 119 { 120 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 121 ipv6_hdr(skb)->saddr.s6_addr32); 122 } 123 124 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 125 int addr_len) 126 { 127 /* This check is replicated from tcp_v6_connect() and intended to 128 * prevent BPF program called below from accessing bytes that are out 129 * of the bound specified by user in addr_len. 130 */ 131 if (addr_len < SIN6_LEN_RFC2133) 132 return -EINVAL; 133 134 sock_owned_by_me(sk); 135 136 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 137 } 138 139 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 140 int addr_len) 141 { 142 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 143 struct inet_connection_sock *icsk = inet_csk(sk); 144 struct in6_addr *saddr = NULL, *final_p, final; 145 struct inet_timewait_death_row *tcp_death_row; 146 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 147 struct inet_sock *inet = inet_sk(sk); 148 struct tcp_sock *tp = tcp_sk(sk); 149 struct net *net = sock_net(sk); 150 struct ipv6_txoptions *opt; 151 struct dst_entry *dst; 152 struct flowi6 fl6; 153 int addr_type; 154 int err; 155 156 if (addr_len < SIN6_LEN_RFC2133) 157 return -EINVAL; 158 159 if (usin->sin6_family != AF_INET6) 160 return -EAFNOSUPPORT; 161 162 memset(&fl6, 0, sizeof(fl6)); 163 164 if (np->sndflow) { 165 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 166 IP6_ECN_flow_init(fl6.flowlabel); 167 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 168 struct ip6_flowlabel *flowlabel; 169 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 170 if (IS_ERR(flowlabel)) 171 return -EINVAL; 172 fl6_sock_release(flowlabel); 173 } 174 } 175 176 /* 177 * connect() to INADDR_ANY means loopback (BSD'ism). 178 */ 179 180 if (ipv6_addr_any(&usin->sin6_addr)) { 181 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 182 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 183 &usin->sin6_addr); 184 else 185 usin->sin6_addr = in6addr_loopback; 186 } 187 188 addr_type = ipv6_addr_type(&usin->sin6_addr); 189 190 if (addr_type & IPV6_ADDR_MULTICAST) 191 return -ENETUNREACH; 192 193 if (addr_type&IPV6_ADDR_LINKLOCAL) { 194 if (addr_len >= sizeof(struct sockaddr_in6) && 195 usin->sin6_scope_id) { 196 /* If interface is set while binding, indices 197 * must coincide. 198 */ 199 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 200 return -EINVAL; 201 202 sk->sk_bound_dev_if = usin->sin6_scope_id; 203 } 204 205 /* Connect to link-local address requires an interface */ 206 if (!sk->sk_bound_dev_if) 207 return -EINVAL; 208 } 209 210 if (tp->rx_opt.ts_recent_stamp && 211 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 212 tp->rx_opt.ts_recent = 0; 213 tp->rx_opt.ts_recent_stamp = 0; 214 WRITE_ONCE(tp->write_seq, 0); 215 } 216 217 sk->sk_v6_daddr = usin->sin6_addr; 218 np->flow_label = fl6.flowlabel; 219 220 /* 221 * TCP over IPv4 222 */ 223 224 if (addr_type & IPV6_ADDR_MAPPED) { 225 u32 exthdrlen = icsk->icsk_ext_hdr_len; 226 struct sockaddr_in sin; 227 228 if (ipv6_only_sock(sk)) 229 return -ENETUNREACH; 230 231 sin.sin_family = AF_INET; 232 sin.sin_port = usin->sin6_port; 233 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 234 235 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 236 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 237 if (sk_is_mptcp(sk)) 238 mptcpv6_handle_mapped(sk, true); 239 sk->sk_backlog_rcv = tcp_v4_do_rcv; 240 #ifdef CONFIG_TCP_MD5SIG 241 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 242 #endif 243 244 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 245 246 if (err) { 247 icsk->icsk_ext_hdr_len = exthdrlen; 248 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 249 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 250 if (sk_is_mptcp(sk)) 251 mptcpv6_handle_mapped(sk, false); 252 sk->sk_backlog_rcv = tcp_v6_do_rcv; 253 #ifdef CONFIG_TCP_MD5SIG 254 tp->af_specific = &tcp_sock_ipv6_specific; 255 #endif 256 goto failure; 257 } 258 np->saddr = sk->sk_v6_rcv_saddr; 259 260 return err; 261 } 262 263 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 264 saddr = &sk->sk_v6_rcv_saddr; 265 266 fl6.flowi6_proto = IPPROTO_TCP; 267 fl6.daddr = sk->sk_v6_daddr; 268 fl6.saddr = saddr ? *saddr : np->saddr; 269 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 270 fl6.flowi6_oif = sk->sk_bound_dev_if; 271 fl6.flowi6_mark = sk->sk_mark; 272 fl6.fl6_dport = usin->sin6_port; 273 fl6.fl6_sport = inet->inet_sport; 274 fl6.flowi6_uid = sk->sk_uid; 275 276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 277 final_p = fl6_update_dst(&fl6, opt, &final); 278 279 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 280 281 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 282 if (IS_ERR(dst)) { 283 err = PTR_ERR(dst); 284 goto failure; 285 } 286 287 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 292 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 293 if (err) 294 goto failure; 295 } 296 297 /* set the source address */ 298 np->saddr = *saddr; 299 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 300 301 sk->sk_gso_type = SKB_GSO_TCPV6; 302 ip6_dst_store(sk, dst, NULL, NULL); 303 304 icsk->icsk_ext_hdr_len = 0; 305 if (opt) 306 icsk->icsk_ext_hdr_len = opt->opt_flen + 307 opt->opt_nflen; 308 309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 310 311 inet->inet_dport = usin->sin6_port; 312 313 tcp_set_state(sk, TCP_SYN_SENT); 314 err = inet6_hash_connect(tcp_death_row, sk); 315 if (err) 316 goto late_failure; 317 318 sk_set_txhash(sk); 319 320 if (likely(!tp->repair)) { 321 if (!tp->write_seq) 322 WRITE_ONCE(tp->write_seq, 323 secure_tcpv6_seq(np->saddr.s6_addr32, 324 sk->sk_v6_daddr.s6_addr32, 325 inet->inet_sport, 326 inet->inet_dport)); 327 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 328 sk->sk_v6_daddr.s6_addr32); 329 } 330 331 if (tcp_fastopen_defer_connect(sk, &err)) 332 return err; 333 if (err) 334 goto late_failure; 335 336 err = tcp_connect(sk); 337 if (err) 338 goto late_failure; 339 340 return 0; 341 342 late_failure: 343 tcp_set_state(sk, TCP_CLOSE); 344 inet_bhash2_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static void tcp_v6_mtu_reduced(struct sock *sk) 352 { 353 struct dst_entry *dst; 354 u32 mtu; 355 356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 357 return; 358 359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 360 361 /* Drop requests trying to increase our current mss. 362 * Check done in __ip6_rt_update_pmtu() is too late. 363 */ 364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 365 return; 366 367 dst = inet6_csk_update_pmtu(sk, mtu); 368 if (!dst) 369 return; 370 371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 372 tcp_sync_mss(sk, dst_mtu(dst)); 373 tcp_simple_retransmit(sk); 374 } 375 } 376 377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 378 u8 type, u8 code, int offset, __be32 info) 379 { 380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 382 struct net *net = dev_net(skb->dev); 383 struct request_sock *fastopen; 384 struct ipv6_pinfo *np; 385 struct tcp_sock *tp; 386 __u32 seq, snd_una; 387 struct sock *sk; 388 bool fatal; 389 int err; 390 391 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 392 &hdr->daddr, th->dest, 393 &hdr->saddr, ntohs(th->source), 394 skb->dev->ifindex, inet6_sdif(skb)); 395 396 if (!sk) { 397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 398 ICMP6_MIB_INERRORS); 399 return -ENOENT; 400 } 401 402 if (sk->sk_state == TCP_TIME_WAIT) { 403 inet_twsk_put(inet_twsk(sk)); 404 return 0; 405 } 406 seq = ntohl(th->seq); 407 fatal = icmpv6_err_convert(type, code, &err); 408 if (sk->sk_state == TCP_NEW_SYN_RECV) { 409 tcp_req_err(sk, seq, fatal); 410 return 0; 411 } 412 413 bh_lock_sock(sk); 414 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 415 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 416 417 if (sk->sk_state == TCP_CLOSE) 418 goto out; 419 420 if (static_branch_unlikely(&ip6_min_hopcount)) { 421 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 422 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 423 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 424 goto out; 425 } 426 } 427 428 tp = tcp_sk(sk); 429 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 430 fastopen = rcu_dereference(tp->fastopen_rsk); 431 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 432 if (sk->sk_state != TCP_LISTEN && 433 !between(seq, snd_una, tp->snd_nxt)) { 434 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 435 goto out; 436 } 437 438 np = tcp_inet6_sk(sk); 439 440 if (type == NDISC_REDIRECT) { 441 if (!sock_owned_by_user(sk)) { 442 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 443 444 if (dst) 445 dst->ops->redirect(dst, sk, skb); 446 } 447 goto out; 448 } 449 450 if (type == ICMPV6_PKT_TOOBIG) { 451 u32 mtu = ntohl(info); 452 453 /* We are not interested in TCP_LISTEN and open_requests 454 * (SYN-ACKs send out by Linux are always <576bytes so 455 * they should go through unfragmented). 456 */ 457 if (sk->sk_state == TCP_LISTEN) 458 goto out; 459 460 if (!ip6_sk_accept_pmtu(sk)) 461 goto out; 462 463 if (mtu < IPV6_MIN_MTU) 464 goto out; 465 466 WRITE_ONCE(tp->mtu_info, mtu); 467 468 if (!sock_owned_by_user(sk)) 469 tcp_v6_mtu_reduced(sk); 470 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 471 &sk->sk_tsq_flags)) 472 sock_hold(sk); 473 goto out; 474 } 475 476 477 /* Might be for an request_sock */ 478 switch (sk->sk_state) { 479 case TCP_SYN_SENT: 480 case TCP_SYN_RECV: 481 /* Only in fast or simultaneous open. If a fast open socket is 482 * already accepted it is treated as a connected one below. 483 */ 484 if (fastopen && !fastopen->sk) 485 break; 486 487 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 488 489 if (!sock_owned_by_user(sk)) 490 tcp_done_with_error(sk, err); 491 else 492 WRITE_ONCE(sk->sk_err_soft, err); 493 goto out; 494 case TCP_LISTEN: 495 break; 496 default: 497 /* check if this ICMP message allows revert of backoff. 498 * (see RFC 6069) 499 */ 500 if (!fastopen && type == ICMPV6_DEST_UNREACH && 501 code == ICMPV6_NOROUTE) 502 tcp_ld_RTO_revert(sk, seq); 503 } 504 505 if (!sock_owned_by_user(sk) && np->recverr) { 506 WRITE_ONCE(sk->sk_err, err); 507 sk_error_report(sk); 508 } else { 509 WRITE_ONCE(sk->sk_err_soft, err); 510 } 511 out: 512 bh_unlock_sock(sk); 513 sock_put(sk); 514 return 0; 515 } 516 517 518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 519 struct flowi *fl, 520 struct request_sock *req, 521 struct tcp_fastopen_cookie *foc, 522 enum tcp_synack_type synack_type, 523 struct sk_buff *syn_skb) 524 { 525 struct inet_request_sock *ireq = inet_rsk(req); 526 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 527 struct ipv6_txoptions *opt; 528 struct flowi6 *fl6 = &fl->u.ip6; 529 struct sk_buff *skb; 530 int err = -ENOMEM; 531 u8 tclass; 532 533 /* First, grab a route. */ 534 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 535 IPPROTO_TCP)) == NULL) 536 goto done; 537 538 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 539 540 if (skb) { 541 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 542 &ireq->ir_v6_rmt_addr); 543 544 fl6->daddr = ireq->ir_v6_rmt_addr; 545 if (np->repflow && ireq->pktopts) 546 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 547 548 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 549 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 550 (np->tclass & INET_ECN_MASK) : 551 np->tclass; 552 553 if (!INET_ECN_is_capable(tclass) && 554 tcp_bpf_ca_needs_ecn((struct sock *)req)) 555 tclass |= INET_ECN_ECT_0; 556 557 rcu_read_lock(); 558 opt = ireq->ipv6_opt; 559 if (!opt) 560 opt = rcu_dereference(np->opt); 561 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 562 opt, tclass, sk->sk_priority); 563 rcu_read_unlock(); 564 err = net_xmit_eval(err); 565 } 566 567 done: 568 return err; 569 } 570 571 572 static void tcp_v6_reqsk_destructor(struct request_sock *req) 573 { 574 kfree(inet_rsk(req)->ipv6_opt); 575 consume_skb(inet_rsk(req)->pktopts); 576 } 577 578 #ifdef CONFIG_TCP_MD5SIG 579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 580 const struct in6_addr *addr, 581 int l3index) 582 { 583 return tcp_md5_do_lookup(sk, l3index, 584 (union tcp_md5_addr *)addr, AF_INET6); 585 } 586 587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 588 const struct sock *addr_sk) 589 { 590 int l3index; 591 592 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 593 addr_sk->sk_bound_dev_if); 594 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 595 l3index); 596 } 597 598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 599 sockptr_t optval, int optlen) 600 { 601 struct tcp_md5sig cmd; 602 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 603 int l3index = 0; 604 u8 prefixlen; 605 u8 flags; 606 607 if (optlen < sizeof(cmd)) 608 return -EINVAL; 609 610 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 611 return -EFAULT; 612 613 if (sin6->sin6_family != AF_INET6) 614 return -EINVAL; 615 616 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 617 618 if (optname == TCP_MD5SIG_EXT && 619 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 620 prefixlen = cmd.tcpm_prefixlen; 621 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 622 prefixlen > 32)) 623 return -EINVAL; 624 } else { 625 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 626 } 627 628 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 630 struct net_device *dev; 631 632 rcu_read_lock(); 633 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 634 if (dev && netif_is_l3_master(dev)) 635 l3index = dev->ifindex; 636 rcu_read_unlock(); 637 638 /* ok to reference set/not set outside of rcu; 639 * right now device MUST be an L3 master 640 */ 641 if (!dev || !l3index) 642 return -EINVAL; 643 } 644 645 if (!cmd.tcpm_keylen) { 646 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 647 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 648 AF_INET, prefixlen, 649 l3index, flags); 650 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 651 AF_INET6, prefixlen, l3index, flags); 652 } 653 654 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 655 return -EINVAL; 656 657 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 658 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 659 AF_INET, prefixlen, l3index, flags, 660 cmd.tcpm_key, cmd.tcpm_keylen); 661 662 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 663 AF_INET6, prefixlen, l3index, flags, 664 cmd.tcpm_key, cmd.tcpm_keylen); 665 } 666 667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 668 const struct in6_addr *daddr, 669 const struct in6_addr *saddr, 670 const struct tcphdr *th, int nbytes) 671 { 672 struct tcp6_pseudohdr *bp; 673 struct scatterlist sg; 674 struct tcphdr *_th; 675 676 bp = hp->scratch; 677 /* 1. TCP pseudo-header (RFC2460) */ 678 bp->saddr = *saddr; 679 bp->daddr = *daddr; 680 bp->protocol = cpu_to_be32(IPPROTO_TCP); 681 bp->len = cpu_to_be32(nbytes); 682 683 _th = (struct tcphdr *)(bp + 1); 684 memcpy(_th, th, sizeof(*th)); 685 _th->check = 0; 686 687 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 688 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 689 sizeof(*bp) + sizeof(*th)); 690 return crypto_ahash_update(hp->md5_req); 691 } 692 693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 694 const struct in6_addr *daddr, struct in6_addr *saddr, 695 const struct tcphdr *th) 696 { 697 struct tcp_md5sig_pool *hp; 698 struct ahash_request *req; 699 700 hp = tcp_get_md5sig_pool(); 701 if (!hp) 702 goto clear_hash_noput; 703 req = hp->md5_req; 704 705 if (crypto_ahash_init(req)) 706 goto clear_hash; 707 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 708 goto clear_hash; 709 if (tcp_md5_hash_key(hp, key)) 710 goto clear_hash; 711 ahash_request_set_crypt(req, NULL, md5_hash, 0); 712 if (crypto_ahash_final(req)) 713 goto clear_hash; 714 715 tcp_put_md5sig_pool(); 716 return 0; 717 718 clear_hash: 719 tcp_put_md5sig_pool(); 720 clear_hash_noput: 721 memset(md5_hash, 0, 16); 722 return 1; 723 } 724 725 static int tcp_v6_md5_hash_skb(char *md5_hash, 726 const struct tcp_md5sig_key *key, 727 const struct sock *sk, 728 const struct sk_buff *skb) 729 { 730 const struct in6_addr *saddr, *daddr; 731 struct tcp_md5sig_pool *hp; 732 struct ahash_request *req; 733 const struct tcphdr *th = tcp_hdr(skb); 734 735 if (sk) { /* valid for establish/request sockets */ 736 saddr = &sk->sk_v6_rcv_saddr; 737 daddr = &sk->sk_v6_daddr; 738 } else { 739 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 740 saddr = &ip6h->saddr; 741 daddr = &ip6h->daddr; 742 } 743 744 hp = tcp_get_md5sig_pool(); 745 if (!hp) 746 goto clear_hash_noput; 747 req = hp->md5_req; 748 749 if (crypto_ahash_init(req)) 750 goto clear_hash; 751 752 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 753 goto clear_hash; 754 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 755 goto clear_hash; 756 if (tcp_md5_hash_key(hp, key)) 757 goto clear_hash; 758 ahash_request_set_crypt(req, NULL, md5_hash, 0); 759 if (crypto_ahash_final(req)) 760 goto clear_hash; 761 762 tcp_put_md5sig_pool(); 763 return 0; 764 765 clear_hash: 766 tcp_put_md5sig_pool(); 767 clear_hash_noput: 768 memset(md5_hash, 0, 16); 769 return 1; 770 } 771 772 #endif 773 774 static void tcp_v6_init_req(struct request_sock *req, 775 const struct sock *sk_listener, 776 struct sk_buff *skb) 777 { 778 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 779 struct inet_request_sock *ireq = inet_rsk(req); 780 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 781 782 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 783 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 784 785 /* So that link locals have meaning */ 786 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 787 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 788 ireq->ir_iif = tcp_v6_iif(skb); 789 790 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 791 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 792 np->rxopt.bits.rxinfo || 793 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 794 np->rxopt.bits.rxohlim || np->repflow)) { 795 refcount_inc(&skb->users); 796 ireq->pktopts = skb; 797 } 798 } 799 800 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 801 struct sk_buff *skb, 802 struct flowi *fl, 803 struct request_sock *req) 804 { 805 tcp_v6_init_req(req, sk, skb); 806 807 if (security_inet_conn_request(sk, skb, req)) 808 return NULL; 809 810 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 811 } 812 813 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 814 .family = AF_INET6, 815 .obj_size = sizeof(struct tcp6_request_sock), 816 .rtx_syn_ack = tcp_rtx_synack, 817 .send_ack = tcp_v6_reqsk_send_ack, 818 .destructor = tcp_v6_reqsk_destructor, 819 .send_reset = tcp_v6_send_reset, 820 .syn_ack_timeout = tcp_syn_ack_timeout, 821 }; 822 823 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 824 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 825 sizeof(struct ipv6hdr), 826 #ifdef CONFIG_TCP_MD5SIG 827 .req_md5_lookup = tcp_v6_md5_lookup, 828 .calc_md5_hash = tcp_v6_md5_hash_skb, 829 #endif 830 #ifdef CONFIG_SYN_COOKIES 831 .cookie_init_seq = cookie_v6_init_sequence, 832 #endif 833 .route_req = tcp_v6_route_req, 834 .init_seq = tcp_v6_init_seq, 835 .init_ts_off = tcp_v6_init_ts_off, 836 .send_synack = tcp_v6_send_synack, 837 }; 838 839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 840 u32 ack, u32 win, u32 tsval, u32 tsecr, 841 int oif, struct tcp_md5sig_key *key, int rst, 842 u8 tclass, __be32 label, u32 priority, u32 txhash) 843 { 844 const struct tcphdr *th = tcp_hdr(skb); 845 struct tcphdr *t1; 846 struct sk_buff *buff; 847 struct flowi6 fl6; 848 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 849 struct sock *ctl_sk = net->ipv6.tcp_sk; 850 unsigned int tot_len = sizeof(struct tcphdr); 851 __be32 mrst = 0, *topt; 852 struct dst_entry *dst; 853 __u32 mark = 0; 854 855 if (tsecr) 856 tot_len += TCPOLEN_TSTAMP_ALIGNED; 857 #ifdef CONFIG_TCP_MD5SIG 858 if (key) 859 tot_len += TCPOLEN_MD5SIG_ALIGNED; 860 #endif 861 862 #ifdef CONFIG_MPTCP 863 if (rst && !key) { 864 mrst = mptcp_reset_option(skb); 865 866 if (mrst) 867 tot_len += sizeof(__be32); 868 } 869 #endif 870 871 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 872 if (!buff) 873 return; 874 875 skb_reserve(buff, MAX_TCP_HEADER); 876 877 t1 = skb_push(buff, tot_len); 878 skb_reset_transport_header(buff); 879 880 /* Swap the send and the receive. */ 881 memset(t1, 0, sizeof(*t1)); 882 t1->dest = th->source; 883 t1->source = th->dest; 884 t1->doff = tot_len / 4; 885 t1->seq = htonl(seq); 886 t1->ack_seq = htonl(ack); 887 t1->ack = !rst || !th->ack; 888 t1->rst = rst; 889 t1->window = htons(win); 890 891 topt = (__be32 *)(t1 + 1); 892 893 if (tsecr) { 894 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 895 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 896 *topt++ = htonl(tsval); 897 *topt++ = htonl(tsecr); 898 } 899 900 if (mrst) 901 *topt++ = mrst; 902 903 #ifdef CONFIG_TCP_MD5SIG 904 if (key) { 905 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 906 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 907 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 908 &ipv6_hdr(skb)->saddr, 909 &ipv6_hdr(skb)->daddr, t1); 910 } 911 #endif 912 913 memset(&fl6, 0, sizeof(fl6)); 914 fl6.daddr = ipv6_hdr(skb)->saddr; 915 fl6.saddr = ipv6_hdr(skb)->daddr; 916 fl6.flowlabel = label; 917 918 buff->ip_summed = CHECKSUM_PARTIAL; 919 920 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 921 922 fl6.flowi6_proto = IPPROTO_TCP; 923 if (rt6_need_strict(&fl6.daddr) && !oif) 924 fl6.flowi6_oif = tcp_v6_iif(skb); 925 else { 926 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 927 oif = skb->skb_iif; 928 929 fl6.flowi6_oif = oif; 930 } 931 932 if (sk) { 933 if (sk->sk_state == TCP_TIME_WAIT) 934 mark = inet_twsk(sk)->tw_mark; 935 else 936 mark = READ_ONCE(sk->sk_mark); 937 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 938 } 939 if (txhash) { 940 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 941 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 942 } 943 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 944 fl6.fl6_dport = t1->dest; 945 fl6.fl6_sport = t1->source; 946 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 947 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 948 949 /* Pass a socket to ip6_dst_lookup either it is for RST 950 * Underlying function will use this to retrieve the network 951 * namespace 952 */ 953 if (sk && sk->sk_state != TCP_TIME_WAIT) 954 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 955 else 956 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 957 if (!IS_ERR(dst)) { 958 skb_dst_set(buff, dst); 959 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 960 tclass & ~INET_ECN_MASK, priority); 961 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 962 if (rst) 963 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 964 return; 965 } 966 967 kfree_skb(buff); 968 } 969 970 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 971 { 972 const struct tcphdr *th = tcp_hdr(skb); 973 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 974 u32 seq = 0, ack_seq = 0; 975 struct tcp_md5sig_key *key = NULL; 976 #ifdef CONFIG_TCP_MD5SIG 977 const __u8 *hash_location = NULL; 978 unsigned char newhash[16]; 979 int genhash; 980 struct sock *sk1 = NULL; 981 #endif 982 __be32 label = 0; 983 u32 priority = 0; 984 struct net *net; 985 u32 txhash = 0; 986 int oif = 0; 987 988 if (th->rst) 989 return; 990 991 /* If sk not NULL, it means we did a successful lookup and incoming 992 * route had to be correct. prequeue might have dropped our dst. 993 */ 994 if (!sk && !ipv6_unicast_destination(skb)) 995 return; 996 997 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 998 #ifdef CONFIG_TCP_MD5SIG 999 rcu_read_lock(); 1000 hash_location = tcp_parse_md5sig_option(th); 1001 if (sk && sk_fullsock(sk)) { 1002 int l3index; 1003 1004 /* sdif set, means packet ingressed via a device 1005 * in an L3 domain and inet_iif is set to it. 1006 */ 1007 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1008 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1009 } else if (hash_location) { 1010 int dif = tcp_v6_iif_l3_slave(skb); 1011 int sdif = tcp_v6_sdif(skb); 1012 int l3index; 1013 1014 /* 1015 * active side is lost. Try to find listening socket through 1016 * source port, and then find md5 key through listening socket. 1017 * we are not loose security here: 1018 * Incoming packet is checked with md5 hash with finding key, 1019 * no RST generated if md5 hash doesn't match. 1020 */ 1021 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1022 NULL, 0, &ipv6h->saddr, th->source, 1023 &ipv6h->daddr, ntohs(th->source), 1024 dif, sdif); 1025 if (!sk1) 1026 goto out; 1027 1028 /* sdif set, means packet ingressed via a device 1029 * in an L3 domain and dif is set to it. 1030 */ 1031 l3index = tcp_v6_sdif(skb) ? dif : 0; 1032 1033 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1034 if (!key) 1035 goto out; 1036 1037 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1038 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1039 goto out; 1040 } 1041 #endif 1042 1043 if (th->ack) 1044 seq = ntohl(th->ack_seq); 1045 else 1046 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1047 (th->doff << 2); 1048 1049 if (sk) { 1050 oif = sk->sk_bound_dev_if; 1051 if (sk_fullsock(sk)) { 1052 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1053 1054 trace_tcp_send_reset(sk, skb); 1055 if (np->repflow) 1056 label = ip6_flowlabel(ipv6h); 1057 priority = sk->sk_priority; 1058 txhash = sk->sk_txhash; 1059 } 1060 if (sk->sk_state == TCP_TIME_WAIT) { 1061 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1062 priority = inet_twsk(sk)->tw_priority; 1063 txhash = inet_twsk(sk)->tw_txhash; 1064 } 1065 } else { 1066 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1067 label = ip6_flowlabel(ipv6h); 1068 } 1069 1070 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1071 ipv6_get_dsfield(ipv6h), label, priority, txhash); 1072 1073 #ifdef CONFIG_TCP_MD5SIG 1074 out: 1075 rcu_read_unlock(); 1076 #endif 1077 } 1078 1079 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1080 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1081 struct tcp_md5sig_key *key, u8 tclass, 1082 __be32 label, u32 priority, u32 txhash) 1083 { 1084 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1085 tclass, label, priority, txhash); 1086 } 1087 1088 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1089 { 1090 struct inet_timewait_sock *tw = inet_twsk(sk); 1091 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1092 1093 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1094 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1095 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1096 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1097 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, 1098 tw->tw_txhash); 1099 1100 inet_twsk_put(tw); 1101 } 1102 1103 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1104 struct request_sock *req) 1105 { 1106 int l3index; 1107 1108 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1109 1110 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1111 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1112 */ 1113 /* RFC 7323 2.3 1114 * The window field (SEG.WND) of every outgoing segment, with the 1115 * exception of <SYN> segments, MUST be right-shifted by 1116 * Rcv.Wind.Shift bits: 1117 */ 1118 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1119 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1120 tcp_rsk(req)->rcv_nxt, 1121 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1122 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1123 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, 1124 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1125 ipv6_get_dsfield(ipv6_hdr(skb)), 0, 1126 READ_ONCE(sk->sk_priority), 1127 READ_ONCE(tcp_rsk(req)->txhash)); 1128 } 1129 1130 1131 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1132 { 1133 #ifdef CONFIG_SYN_COOKIES 1134 const struct tcphdr *th = tcp_hdr(skb); 1135 1136 if (!th->syn) 1137 sk = cookie_v6_check(sk, skb); 1138 #endif 1139 return sk; 1140 } 1141 1142 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1143 struct tcphdr *th, u32 *cookie) 1144 { 1145 u16 mss = 0; 1146 #ifdef CONFIG_SYN_COOKIES 1147 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1148 &tcp_request_sock_ipv6_ops, sk, th); 1149 if (mss) { 1150 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1151 tcp_synq_overflow(sk); 1152 } 1153 #endif 1154 return mss; 1155 } 1156 1157 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1158 { 1159 if (skb->protocol == htons(ETH_P_IP)) 1160 return tcp_v4_conn_request(sk, skb); 1161 1162 if (!ipv6_unicast_destination(skb)) 1163 goto drop; 1164 1165 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1166 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1167 return 0; 1168 } 1169 1170 return tcp_conn_request(&tcp6_request_sock_ops, 1171 &tcp_request_sock_ipv6_ops, sk, skb); 1172 1173 drop: 1174 tcp_listendrop(sk); 1175 return 0; /* don't send reset */ 1176 } 1177 1178 static void tcp_v6_restore_cb(struct sk_buff *skb) 1179 { 1180 /* We need to move header back to the beginning if xfrm6_policy_check() 1181 * and tcp_v6_fill_cb() are going to be called again. 1182 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1183 */ 1184 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1185 sizeof(struct inet6_skb_parm)); 1186 } 1187 1188 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1189 struct request_sock *req, 1190 struct dst_entry *dst, 1191 struct request_sock *req_unhash, 1192 bool *own_req) 1193 { 1194 struct inet_request_sock *ireq; 1195 struct ipv6_pinfo *newnp; 1196 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1197 struct ipv6_txoptions *opt; 1198 struct inet_sock *newinet; 1199 bool found_dup_sk = false; 1200 struct tcp_sock *newtp; 1201 struct sock *newsk; 1202 #ifdef CONFIG_TCP_MD5SIG 1203 struct tcp_md5sig_key *key; 1204 int l3index; 1205 #endif 1206 struct flowi6 fl6; 1207 1208 if (skb->protocol == htons(ETH_P_IP)) { 1209 /* 1210 * v6 mapped 1211 */ 1212 1213 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1214 req_unhash, own_req); 1215 1216 if (!newsk) 1217 return NULL; 1218 1219 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1220 1221 newnp = tcp_inet6_sk(newsk); 1222 newtp = tcp_sk(newsk); 1223 1224 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1225 1226 newnp->saddr = newsk->sk_v6_rcv_saddr; 1227 1228 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1229 if (sk_is_mptcp(newsk)) 1230 mptcpv6_handle_mapped(newsk, true); 1231 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1232 #ifdef CONFIG_TCP_MD5SIG 1233 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1234 #endif 1235 1236 newnp->ipv6_mc_list = NULL; 1237 newnp->ipv6_ac_list = NULL; 1238 newnp->ipv6_fl_list = NULL; 1239 newnp->pktoptions = NULL; 1240 newnp->opt = NULL; 1241 newnp->mcast_oif = inet_iif(skb); 1242 newnp->mcast_hops = ip_hdr(skb)->ttl; 1243 newnp->rcv_flowinfo = 0; 1244 if (np->repflow) 1245 newnp->flow_label = 0; 1246 1247 /* 1248 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1249 * here, tcp_create_openreq_child now does this for us, see the comment in 1250 * that function for the gory details. -acme 1251 */ 1252 1253 /* It is tricky place. Until this moment IPv4 tcp 1254 worked with IPv6 icsk.icsk_af_ops. 1255 Sync it now. 1256 */ 1257 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1258 1259 return newsk; 1260 } 1261 1262 ireq = inet_rsk(req); 1263 1264 if (sk_acceptq_is_full(sk)) 1265 goto out_overflow; 1266 1267 if (!dst) { 1268 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1269 if (!dst) 1270 goto out; 1271 } 1272 1273 newsk = tcp_create_openreq_child(sk, req, skb); 1274 if (!newsk) 1275 goto out_nonewsk; 1276 1277 /* 1278 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1279 * count here, tcp_create_openreq_child now does this for us, see the 1280 * comment in that function for the gory details. -acme 1281 */ 1282 1283 newsk->sk_gso_type = SKB_GSO_TCPV6; 1284 inet6_sk_rx_dst_set(newsk, skb); 1285 1286 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1287 1288 newtp = tcp_sk(newsk); 1289 newinet = inet_sk(newsk); 1290 newnp = tcp_inet6_sk(newsk); 1291 1292 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1293 1294 ip6_dst_store(newsk, dst, NULL, NULL); 1295 1296 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1297 newnp->saddr = ireq->ir_v6_loc_addr; 1298 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1299 newsk->sk_bound_dev_if = ireq->ir_iif; 1300 1301 /* Now IPv6 options... 1302 1303 First: no IPv4 options. 1304 */ 1305 newinet->inet_opt = NULL; 1306 newnp->ipv6_mc_list = NULL; 1307 newnp->ipv6_ac_list = NULL; 1308 newnp->ipv6_fl_list = NULL; 1309 1310 /* Clone RX bits */ 1311 newnp->rxopt.all = np->rxopt.all; 1312 1313 newnp->pktoptions = NULL; 1314 newnp->opt = NULL; 1315 newnp->mcast_oif = tcp_v6_iif(skb); 1316 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1317 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1318 if (np->repflow) 1319 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1320 1321 /* Set ToS of the new socket based upon the value of incoming SYN. 1322 * ECT bits are set later in tcp_init_transfer(). 1323 */ 1324 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1325 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1326 1327 /* Clone native IPv6 options from listening socket (if any) 1328 1329 Yes, keeping reference count would be much more clever, 1330 but we make one more one thing there: reattach optmem 1331 to newsk. 1332 */ 1333 opt = ireq->ipv6_opt; 1334 if (!opt) 1335 opt = rcu_dereference(np->opt); 1336 if (opt) { 1337 opt = ipv6_dup_options(newsk, opt); 1338 RCU_INIT_POINTER(newnp->opt, opt); 1339 } 1340 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1341 if (opt) 1342 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1343 opt->opt_flen; 1344 1345 tcp_ca_openreq_child(newsk, dst); 1346 1347 tcp_sync_mss(newsk, dst_mtu(dst)); 1348 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1349 1350 tcp_initialize_rcv_mss(newsk); 1351 1352 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1353 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1354 1355 #ifdef CONFIG_TCP_MD5SIG 1356 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1357 1358 /* Copy over the MD5 key from the original socket */ 1359 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1360 if (key) { 1361 const union tcp_md5_addr *addr; 1362 1363 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1364 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { 1365 inet_csk_prepare_forced_close(newsk); 1366 tcp_done(newsk); 1367 goto out; 1368 } 1369 } 1370 #endif 1371 1372 if (__inet_inherit_port(sk, newsk) < 0) { 1373 inet_csk_prepare_forced_close(newsk); 1374 tcp_done(newsk); 1375 goto out; 1376 } 1377 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1378 &found_dup_sk); 1379 if (*own_req) { 1380 tcp_move_syn(newtp, req); 1381 1382 /* Clone pktoptions received with SYN, if we own the req */ 1383 if (ireq->pktopts) { 1384 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1385 consume_skb(ireq->pktopts); 1386 ireq->pktopts = NULL; 1387 if (newnp->pktoptions) 1388 tcp_v6_restore_cb(newnp->pktoptions); 1389 } 1390 } else { 1391 if (!req_unhash && found_dup_sk) { 1392 /* This code path should only be executed in the 1393 * syncookie case only 1394 */ 1395 bh_unlock_sock(newsk); 1396 sock_put(newsk); 1397 newsk = NULL; 1398 } 1399 } 1400 1401 return newsk; 1402 1403 out_overflow: 1404 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1405 out_nonewsk: 1406 dst_release(dst); 1407 out: 1408 tcp_listendrop(sk); 1409 return NULL; 1410 } 1411 1412 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1413 u32)); 1414 /* The socket must have it's spinlock held when we get 1415 * here, unless it is a TCP_LISTEN socket. 1416 * 1417 * We have a potential double-lock case here, so even when 1418 * doing backlog processing we use the BH locking scheme. 1419 * This is because we cannot sleep with the original spinlock 1420 * held. 1421 */ 1422 INDIRECT_CALLABLE_SCOPE 1423 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1424 { 1425 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1426 struct sk_buff *opt_skb = NULL; 1427 enum skb_drop_reason reason; 1428 struct tcp_sock *tp; 1429 1430 /* Imagine: socket is IPv6. IPv4 packet arrives, 1431 goes to IPv4 receive handler and backlogged. 1432 From backlog it always goes here. Kerboom... 1433 Fortunately, tcp_rcv_established and rcv_established 1434 handle them correctly, but it is not case with 1435 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1436 */ 1437 1438 if (skb->protocol == htons(ETH_P_IP)) 1439 return tcp_v4_do_rcv(sk, skb); 1440 1441 /* 1442 * socket locking is here for SMP purposes as backlog rcv 1443 * is currently called with bh processing disabled. 1444 */ 1445 1446 /* Do Stevens' IPV6_PKTOPTIONS. 1447 1448 Yes, guys, it is the only place in our code, where we 1449 may make it not affecting IPv4. 1450 The rest of code is protocol independent, 1451 and I do not like idea to uglify IPv4. 1452 1453 Actually, all the idea behind IPV6_PKTOPTIONS 1454 looks not very well thought. For now we latch 1455 options, received in the last packet, enqueued 1456 by tcp. Feel free to propose better solution. 1457 --ANK (980728) 1458 */ 1459 if (np->rxopt.all) 1460 opt_skb = skb_clone_and_charge_r(skb, sk); 1461 1462 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1463 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1464 struct dst_entry *dst; 1465 1466 dst = rcu_dereference_protected(sk->sk_rx_dst, 1467 lockdep_sock_is_held(sk)); 1468 1469 sock_rps_save_rxhash(sk, skb); 1470 sk_mark_napi_id(sk, skb); 1471 if (dst) { 1472 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1473 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1474 dst, sk->sk_rx_dst_cookie) == NULL) { 1475 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1476 dst_release(dst); 1477 } 1478 } 1479 1480 tcp_rcv_established(sk, skb); 1481 if (opt_skb) 1482 goto ipv6_pktoptions; 1483 return 0; 1484 } 1485 1486 if (tcp_checksum_complete(skb)) 1487 goto csum_err; 1488 1489 if (sk->sk_state == TCP_LISTEN) { 1490 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1491 1492 if (!nsk) 1493 goto discard; 1494 1495 if (nsk != sk) { 1496 if (tcp_child_process(sk, nsk, skb)) 1497 goto reset; 1498 if (opt_skb) 1499 __kfree_skb(opt_skb); 1500 return 0; 1501 } 1502 } else 1503 sock_rps_save_rxhash(sk, skb); 1504 1505 if (tcp_rcv_state_process(sk, skb)) 1506 goto reset; 1507 if (opt_skb) 1508 goto ipv6_pktoptions; 1509 return 0; 1510 1511 reset: 1512 tcp_v6_send_reset(sk, skb); 1513 discard: 1514 if (opt_skb) 1515 __kfree_skb(opt_skb); 1516 kfree_skb_reason(skb, reason); 1517 return 0; 1518 csum_err: 1519 reason = SKB_DROP_REASON_TCP_CSUM; 1520 trace_tcp_bad_csum(skb); 1521 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1522 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1523 goto discard; 1524 1525 1526 ipv6_pktoptions: 1527 /* Do you ask, what is it? 1528 1529 1. skb was enqueued by tcp. 1530 2. skb is added to tail of read queue, rather than out of order. 1531 3. socket is not in passive state. 1532 4. Finally, it really contains options, which user wants to receive. 1533 */ 1534 tp = tcp_sk(sk); 1535 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1536 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1537 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1538 np->mcast_oif = tcp_v6_iif(opt_skb); 1539 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1540 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1541 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1542 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1543 if (np->repflow) 1544 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1545 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1546 tcp_v6_restore_cb(opt_skb); 1547 opt_skb = xchg(&np->pktoptions, opt_skb); 1548 } else { 1549 __kfree_skb(opt_skb); 1550 opt_skb = xchg(&np->pktoptions, NULL); 1551 } 1552 } 1553 1554 consume_skb(opt_skb); 1555 return 0; 1556 } 1557 1558 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1559 const struct tcphdr *th) 1560 { 1561 /* This is tricky: we move IP6CB at its correct location into 1562 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1563 * _decode_session6() uses IP6CB(). 1564 * barrier() makes sure compiler won't play aliasing games. 1565 */ 1566 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1567 sizeof(struct inet6_skb_parm)); 1568 barrier(); 1569 1570 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1571 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1572 skb->len - th->doff*4); 1573 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1574 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1575 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1576 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1577 TCP_SKB_CB(skb)->sacked = 0; 1578 TCP_SKB_CB(skb)->has_rxtstamp = 1579 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1580 } 1581 1582 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1583 { 1584 enum skb_drop_reason drop_reason; 1585 int sdif = inet6_sdif(skb); 1586 int dif = inet6_iif(skb); 1587 const struct tcphdr *th; 1588 const struct ipv6hdr *hdr; 1589 bool refcounted; 1590 struct sock *sk; 1591 int ret; 1592 struct net *net = dev_net(skb->dev); 1593 1594 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1595 if (skb->pkt_type != PACKET_HOST) 1596 goto discard_it; 1597 1598 /* 1599 * Count it even if it's bad. 1600 */ 1601 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1602 1603 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1604 goto discard_it; 1605 1606 th = (const struct tcphdr *)skb->data; 1607 1608 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1609 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1610 goto bad_packet; 1611 } 1612 if (!pskb_may_pull(skb, th->doff*4)) 1613 goto discard_it; 1614 1615 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1616 goto csum_error; 1617 1618 th = (const struct tcphdr *)skb->data; 1619 hdr = ipv6_hdr(skb); 1620 1621 lookup: 1622 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), 1623 th->source, th->dest, inet6_iif(skb), sdif, 1624 &refcounted); 1625 if (!sk) 1626 goto no_tcp_socket; 1627 1628 process: 1629 if (sk->sk_state == TCP_TIME_WAIT) 1630 goto do_time_wait; 1631 1632 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1633 struct request_sock *req = inet_reqsk(sk); 1634 bool req_stolen = false; 1635 struct sock *nsk; 1636 1637 sk = req->rsk_listener; 1638 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1639 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1640 else 1641 drop_reason = tcp_inbound_md5_hash(sk, skb, 1642 &hdr->saddr, &hdr->daddr, 1643 AF_INET6, dif, sdif); 1644 if (drop_reason) { 1645 sk_drops_add(sk, skb); 1646 reqsk_put(req); 1647 goto discard_it; 1648 } 1649 if (tcp_checksum_complete(skb)) { 1650 reqsk_put(req); 1651 goto csum_error; 1652 } 1653 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1654 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1655 if (!nsk) { 1656 inet_csk_reqsk_queue_drop_and_put(sk, req); 1657 goto lookup; 1658 } 1659 sk = nsk; 1660 /* reuseport_migrate_sock() has already held one sk_refcnt 1661 * before returning. 1662 */ 1663 } else { 1664 sock_hold(sk); 1665 } 1666 refcounted = true; 1667 nsk = NULL; 1668 if (!tcp_filter(sk, skb)) { 1669 th = (const struct tcphdr *)skb->data; 1670 hdr = ipv6_hdr(skb); 1671 tcp_v6_fill_cb(skb, hdr, th); 1672 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1673 } else { 1674 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1675 } 1676 if (!nsk) { 1677 reqsk_put(req); 1678 if (req_stolen) { 1679 /* Another cpu got exclusive access to req 1680 * and created a full blown socket. 1681 * Try to feed this packet to this socket 1682 * instead of discarding it. 1683 */ 1684 tcp_v6_restore_cb(skb); 1685 sock_put(sk); 1686 goto lookup; 1687 } 1688 goto discard_and_relse; 1689 } 1690 nf_reset_ct(skb); 1691 if (nsk == sk) { 1692 reqsk_put(req); 1693 tcp_v6_restore_cb(skb); 1694 } else if (tcp_child_process(sk, nsk, skb)) { 1695 tcp_v6_send_reset(nsk, skb); 1696 goto discard_and_relse; 1697 } else { 1698 sock_put(sk); 1699 return 0; 1700 } 1701 } 1702 1703 if (static_branch_unlikely(&ip6_min_hopcount)) { 1704 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1705 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1706 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1707 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1708 goto discard_and_relse; 1709 } 1710 } 1711 1712 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1713 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1714 goto discard_and_relse; 1715 } 1716 1717 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1718 AF_INET6, dif, sdif); 1719 if (drop_reason) 1720 goto discard_and_relse; 1721 1722 nf_reset_ct(skb); 1723 1724 if (tcp_filter(sk, skb)) { 1725 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1726 goto discard_and_relse; 1727 } 1728 th = (const struct tcphdr *)skb->data; 1729 hdr = ipv6_hdr(skb); 1730 tcp_v6_fill_cb(skb, hdr, th); 1731 1732 skb->dev = NULL; 1733 1734 if (sk->sk_state == TCP_LISTEN) { 1735 ret = tcp_v6_do_rcv(sk, skb); 1736 goto put_and_return; 1737 } 1738 1739 sk_incoming_cpu_update(sk); 1740 1741 bh_lock_sock_nested(sk); 1742 tcp_segs_in(tcp_sk(sk), skb); 1743 ret = 0; 1744 if (!sock_owned_by_user(sk)) { 1745 ret = tcp_v6_do_rcv(sk, skb); 1746 } else { 1747 if (tcp_add_backlog(sk, skb, &drop_reason)) 1748 goto discard_and_relse; 1749 } 1750 bh_unlock_sock(sk); 1751 put_and_return: 1752 if (refcounted) 1753 sock_put(sk); 1754 return ret ? -1 : 0; 1755 1756 no_tcp_socket: 1757 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1758 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1759 goto discard_it; 1760 1761 tcp_v6_fill_cb(skb, hdr, th); 1762 1763 if (tcp_checksum_complete(skb)) { 1764 csum_error: 1765 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1766 trace_tcp_bad_csum(skb); 1767 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1768 bad_packet: 1769 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1770 } else { 1771 tcp_v6_send_reset(NULL, skb); 1772 } 1773 1774 discard_it: 1775 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1776 kfree_skb_reason(skb, drop_reason); 1777 return 0; 1778 1779 discard_and_relse: 1780 sk_drops_add(sk, skb); 1781 if (refcounted) 1782 sock_put(sk); 1783 goto discard_it; 1784 1785 do_time_wait: 1786 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1787 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1788 inet_twsk_put(inet_twsk(sk)); 1789 goto discard_it; 1790 } 1791 1792 tcp_v6_fill_cb(skb, hdr, th); 1793 1794 if (tcp_checksum_complete(skb)) { 1795 inet_twsk_put(inet_twsk(sk)); 1796 goto csum_error; 1797 } 1798 1799 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1800 case TCP_TW_SYN: 1801 { 1802 struct sock *sk2; 1803 1804 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 1805 skb, __tcp_hdrlen(th), 1806 &ipv6_hdr(skb)->saddr, th->source, 1807 &ipv6_hdr(skb)->daddr, 1808 ntohs(th->dest), 1809 tcp_v6_iif_l3_slave(skb), 1810 sdif); 1811 if (sk2) { 1812 struct inet_timewait_sock *tw = inet_twsk(sk); 1813 inet_twsk_deschedule_put(tw); 1814 sk = sk2; 1815 tcp_v6_restore_cb(skb); 1816 refcounted = false; 1817 goto process; 1818 } 1819 } 1820 /* to ACK */ 1821 fallthrough; 1822 case TCP_TW_ACK: 1823 tcp_v6_timewait_ack(sk, skb); 1824 break; 1825 case TCP_TW_RST: 1826 tcp_v6_send_reset(sk, skb); 1827 inet_twsk_deschedule_put(inet_twsk(sk)); 1828 goto discard_it; 1829 case TCP_TW_SUCCESS: 1830 ; 1831 } 1832 goto discard_it; 1833 } 1834 1835 void tcp_v6_early_demux(struct sk_buff *skb) 1836 { 1837 struct net *net = dev_net(skb->dev); 1838 const struct ipv6hdr *hdr; 1839 const struct tcphdr *th; 1840 struct sock *sk; 1841 1842 if (skb->pkt_type != PACKET_HOST) 1843 return; 1844 1845 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1846 return; 1847 1848 hdr = ipv6_hdr(skb); 1849 th = tcp_hdr(skb); 1850 1851 if (th->doff < sizeof(struct tcphdr) / 4) 1852 return; 1853 1854 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1855 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 1856 &hdr->saddr, th->source, 1857 &hdr->daddr, ntohs(th->dest), 1858 inet6_iif(skb), inet6_sdif(skb)); 1859 if (sk) { 1860 skb->sk = sk; 1861 skb->destructor = sock_edemux; 1862 if (sk_fullsock(sk)) { 1863 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1864 1865 if (dst) 1866 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1867 if (dst && 1868 sk->sk_rx_dst_ifindex == skb->skb_iif) 1869 skb_dst_set_noref(skb, dst); 1870 } 1871 } 1872 } 1873 1874 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1875 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1876 .twsk_unique = tcp_twsk_unique, 1877 .twsk_destructor = tcp_twsk_destructor, 1878 }; 1879 1880 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1881 { 1882 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1883 } 1884 1885 const struct inet_connection_sock_af_ops ipv6_specific = { 1886 .queue_xmit = inet6_csk_xmit, 1887 .send_check = tcp_v6_send_check, 1888 .rebuild_header = inet6_sk_rebuild_header, 1889 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1890 .conn_request = tcp_v6_conn_request, 1891 .syn_recv_sock = tcp_v6_syn_recv_sock, 1892 .net_header_len = sizeof(struct ipv6hdr), 1893 .net_frag_header_len = sizeof(struct frag_hdr), 1894 .setsockopt = ipv6_setsockopt, 1895 .getsockopt = ipv6_getsockopt, 1896 .addr2sockaddr = inet6_csk_addr2sockaddr, 1897 .sockaddr_len = sizeof(struct sockaddr_in6), 1898 .mtu_reduced = tcp_v6_mtu_reduced, 1899 }; 1900 1901 #ifdef CONFIG_TCP_MD5SIG 1902 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1903 .md5_lookup = tcp_v6_md5_lookup, 1904 .calc_md5_hash = tcp_v6_md5_hash_skb, 1905 .md5_parse = tcp_v6_parse_md5_keys, 1906 }; 1907 #endif 1908 1909 /* 1910 * TCP over IPv4 via INET6 API 1911 */ 1912 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1913 .queue_xmit = ip_queue_xmit, 1914 .send_check = tcp_v4_send_check, 1915 .rebuild_header = inet_sk_rebuild_header, 1916 .sk_rx_dst_set = inet_sk_rx_dst_set, 1917 .conn_request = tcp_v6_conn_request, 1918 .syn_recv_sock = tcp_v6_syn_recv_sock, 1919 .net_header_len = sizeof(struct iphdr), 1920 .setsockopt = ipv6_setsockopt, 1921 .getsockopt = ipv6_getsockopt, 1922 .addr2sockaddr = inet6_csk_addr2sockaddr, 1923 .sockaddr_len = sizeof(struct sockaddr_in6), 1924 .mtu_reduced = tcp_v4_mtu_reduced, 1925 }; 1926 1927 #ifdef CONFIG_TCP_MD5SIG 1928 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1929 .md5_lookup = tcp_v4_md5_lookup, 1930 .calc_md5_hash = tcp_v4_md5_hash_skb, 1931 .md5_parse = tcp_v6_parse_md5_keys, 1932 }; 1933 #endif 1934 1935 /* NOTE: A lot of things set to zero explicitly by call to 1936 * sk_alloc() so need not be done here. 1937 */ 1938 static int tcp_v6_init_sock(struct sock *sk) 1939 { 1940 struct inet_connection_sock *icsk = inet_csk(sk); 1941 1942 tcp_init_sock(sk); 1943 1944 icsk->icsk_af_ops = &ipv6_specific; 1945 1946 #ifdef CONFIG_TCP_MD5SIG 1947 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1948 #endif 1949 1950 return 0; 1951 } 1952 1953 #ifdef CONFIG_PROC_FS 1954 /* Proc filesystem TCPv6 sock list dumping. */ 1955 static void get_openreq6(struct seq_file *seq, 1956 const struct request_sock *req, int i) 1957 { 1958 long ttd = req->rsk_timer.expires - jiffies; 1959 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1960 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1961 1962 if (ttd < 0) 1963 ttd = 0; 1964 1965 seq_printf(seq, 1966 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1967 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1968 i, 1969 src->s6_addr32[0], src->s6_addr32[1], 1970 src->s6_addr32[2], src->s6_addr32[3], 1971 inet_rsk(req)->ir_num, 1972 dest->s6_addr32[0], dest->s6_addr32[1], 1973 dest->s6_addr32[2], dest->s6_addr32[3], 1974 ntohs(inet_rsk(req)->ir_rmt_port), 1975 TCP_SYN_RECV, 1976 0, 0, /* could print option size, but that is af dependent. */ 1977 1, /* timers active (only the expire timer) */ 1978 jiffies_to_clock_t(ttd), 1979 req->num_timeout, 1980 from_kuid_munged(seq_user_ns(seq), 1981 sock_i_uid(req->rsk_listener)), 1982 0, /* non standard timer */ 1983 0, /* open_requests have no inode */ 1984 0, req); 1985 } 1986 1987 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 1988 { 1989 const struct in6_addr *dest, *src; 1990 __u16 destp, srcp; 1991 int timer_active; 1992 unsigned long timer_expires; 1993 const struct inet_sock *inet = inet_sk(sp); 1994 const struct tcp_sock *tp = tcp_sk(sp); 1995 const struct inet_connection_sock *icsk = inet_csk(sp); 1996 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1997 int rx_queue; 1998 int state; 1999 2000 dest = &sp->sk_v6_daddr; 2001 src = &sp->sk_v6_rcv_saddr; 2002 destp = ntohs(inet->inet_dport); 2003 srcp = ntohs(inet->inet_sport); 2004 2005 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2006 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2007 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2008 timer_active = 1; 2009 timer_expires = icsk->icsk_timeout; 2010 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2011 timer_active = 4; 2012 timer_expires = icsk->icsk_timeout; 2013 } else if (timer_pending(&sp->sk_timer)) { 2014 timer_active = 2; 2015 timer_expires = sp->sk_timer.expires; 2016 } else { 2017 timer_active = 0; 2018 timer_expires = jiffies; 2019 } 2020 2021 state = inet_sk_state_load(sp); 2022 if (state == TCP_LISTEN) 2023 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2024 else 2025 /* Because we don't lock the socket, 2026 * we might find a transient negative value. 2027 */ 2028 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2029 READ_ONCE(tp->copied_seq), 0); 2030 2031 seq_printf(seq, 2032 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2033 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2034 i, 2035 src->s6_addr32[0], src->s6_addr32[1], 2036 src->s6_addr32[2], src->s6_addr32[3], srcp, 2037 dest->s6_addr32[0], dest->s6_addr32[1], 2038 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2039 state, 2040 READ_ONCE(tp->write_seq) - tp->snd_una, 2041 rx_queue, 2042 timer_active, 2043 jiffies_delta_to_clock_t(timer_expires - jiffies), 2044 icsk->icsk_retransmits, 2045 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2046 icsk->icsk_probes_out, 2047 sock_i_ino(sp), 2048 refcount_read(&sp->sk_refcnt), sp, 2049 jiffies_to_clock_t(icsk->icsk_rto), 2050 jiffies_to_clock_t(icsk->icsk_ack.ato), 2051 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2052 tcp_snd_cwnd(tp), 2053 state == TCP_LISTEN ? 2054 fastopenq->max_qlen : 2055 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2056 ); 2057 } 2058 2059 static void get_timewait6_sock(struct seq_file *seq, 2060 struct inet_timewait_sock *tw, int i) 2061 { 2062 long delta = tw->tw_timer.expires - jiffies; 2063 const struct in6_addr *dest, *src; 2064 __u16 destp, srcp; 2065 2066 dest = &tw->tw_v6_daddr; 2067 src = &tw->tw_v6_rcv_saddr; 2068 destp = ntohs(tw->tw_dport); 2069 srcp = ntohs(tw->tw_sport); 2070 2071 seq_printf(seq, 2072 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2073 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2074 i, 2075 src->s6_addr32[0], src->s6_addr32[1], 2076 src->s6_addr32[2], src->s6_addr32[3], srcp, 2077 dest->s6_addr32[0], dest->s6_addr32[1], 2078 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2079 tw->tw_substate, 0, 0, 2080 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2081 refcount_read(&tw->tw_refcnt), tw); 2082 } 2083 2084 static int tcp6_seq_show(struct seq_file *seq, void *v) 2085 { 2086 struct tcp_iter_state *st; 2087 struct sock *sk = v; 2088 2089 if (v == SEQ_START_TOKEN) { 2090 seq_puts(seq, 2091 " sl " 2092 "local_address " 2093 "remote_address " 2094 "st tx_queue rx_queue tr tm->when retrnsmt" 2095 " uid timeout inode\n"); 2096 goto out; 2097 } 2098 st = seq->private; 2099 2100 if (sk->sk_state == TCP_TIME_WAIT) 2101 get_timewait6_sock(seq, v, st->num); 2102 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2103 get_openreq6(seq, v, st->num); 2104 else 2105 get_tcp6_sock(seq, v, st->num); 2106 out: 2107 return 0; 2108 } 2109 2110 static const struct seq_operations tcp6_seq_ops = { 2111 .show = tcp6_seq_show, 2112 .start = tcp_seq_start, 2113 .next = tcp_seq_next, 2114 .stop = tcp_seq_stop, 2115 }; 2116 2117 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2118 .family = AF_INET6, 2119 }; 2120 2121 int __net_init tcp6_proc_init(struct net *net) 2122 { 2123 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2124 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2125 return -ENOMEM; 2126 return 0; 2127 } 2128 2129 void tcp6_proc_exit(struct net *net) 2130 { 2131 remove_proc_entry("tcp6", net->proc_net); 2132 } 2133 #endif 2134 2135 struct proto tcpv6_prot = { 2136 .name = "TCPv6", 2137 .owner = THIS_MODULE, 2138 .close = tcp_close, 2139 .pre_connect = tcp_v6_pre_connect, 2140 .connect = tcp_v6_connect, 2141 .disconnect = tcp_disconnect, 2142 .accept = inet_csk_accept, 2143 .ioctl = tcp_ioctl, 2144 .init = tcp_v6_init_sock, 2145 .destroy = tcp_v4_destroy_sock, 2146 .shutdown = tcp_shutdown, 2147 .setsockopt = tcp_setsockopt, 2148 .getsockopt = tcp_getsockopt, 2149 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2150 .keepalive = tcp_set_keepalive, 2151 .recvmsg = tcp_recvmsg, 2152 .sendmsg = tcp_sendmsg, 2153 .splice_eof = tcp_splice_eof, 2154 .backlog_rcv = tcp_v6_do_rcv, 2155 .release_cb = tcp_release_cb, 2156 .hash = inet6_hash, 2157 .unhash = inet_unhash, 2158 .get_port = inet_csk_get_port, 2159 .put_port = inet_put_port, 2160 #ifdef CONFIG_BPF_SYSCALL 2161 .psock_update_sk_prot = tcp_bpf_update_proto, 2162 #endif 2163 .enter_memory_pressure = tcp_enter_memory_pressure, 2164 .leave_memory_pressure = tcp_leave_memory_pressure, 2165 .stream_memory_free = tcp_stream_memory_free, 2166 .sockets_allocated = &tcp_sockets_allocated, 2167 2168 .memory_allocated = &tcp_memory_allocated, 2169 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2170 2171 .memory_pressure = &tcp_memory_pressure, 2172 .orphan_count = &tcp_orphan_count, 2173 .sysctl_mem = sysctl_tcp_mem, 2174 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2175 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2176 .max_header = MAX_TCP_HEADER, 2177 .obj_size = sizeof(struct tcp6_sock), 2178 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2179 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2180 .twsk_prot = &tcp6_timewait_sock_ops, 2181 .rsk_prot = &tcp6_request_sock_ops, 2182 .h.hashinfo = NULL, 2183 .no_autobind = true, 2184 .diag_destroy = tcp_abort, 2185 }; 2186 EXPORT_SYMBOL_GPL(tcpv6_prot); 2187 2188 static const struct inet6_protocol tcpv6_protocol = { 2189 .handler = tcp_v6_rcv, 2190 .err_handler = tcp_v6_err, 2191 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2192 }; 2193 2194 static struct inet_protosw tcpv6_protosw = { 2195 .type = SOCK_STREAM, 2196 .protocol = IPPROTO_TCP, 2197 .prot = &tcpv6_prot, 2198 .ops = &inet6_stream_ops, 2199 .flags = INET_PROTOSW_PERMANENT | 2200 INET_PROTOSW_ICSK, 2201 }; 2202 2203 static int __net_init tcpv6_net_init(struct net *net) 2204 { 2205 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2206 SOCK_RAW, IPPROTO_TCP, net); 2207 } 2208 2209 static void __net_exit tcpv6_net_exit(struct net *net) 2210 { 2211 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2212 } 2213 2214 static struct pernet_operations tcpv6_net_ops = { 2215 .init = tcpv6_net_init, 2216 .exit = tcpv6_net_exit, 2217 }; 2218 2219 int __init tcpv6_init(void) 2220 { 2221 int ret; 2222 2223 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2224 if (ret) 2225 goto out; 2226 2227 /* register inet6 protocol */ 2228 ret = inet6_register_protosw(&tcpv6_protosw); 2229 if (ret) 2230 goto out_tcpv6_protocol; 2231 2232 ret = register_pernet_subsys(&tcpv6_net_ops); 2233 if (ret) 2234 goto out_tcpv6_protosw; 2235 2236 ret = mptcpv6_init(); 2237 if (ret) 2238 goto out_tcpv6_pernet_subsys; 2239 2240 out: 2241 return ret; 2242 2243 out_tcpv6_pernet_subsys: 2244 unregister_pernet_subsys(&tcpv6_net_ops); 2245 out_tcpv6_protosw: 2246 inet6_unregister_protosw(&tcpv6_protosw); 2247 out_tcpv6_protocol: 2248 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2249 goto out; 2250 } 2251 2252 void tcpv6_exit(void) 2253 { 2254 unregister_pernet_subsys(&tcpv6_net_ops); 2255 inet6_unregister_protosw(&tcpv6_protosw); 2256 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2257 } 2258