1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 sk->sk_rx_dst = dst; 111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 152 struct tcp_sock *tp = tcp_sk(sk); 153 struct in6_addr *saddr = NULL, *final_p, final; 154 struct ipv6_txoptions *opt; 155 struct flowi6 fl6; 156 struct dst_entry *dst; 157 int addr_type; 158 int err; 159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (__ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 err = inet6_hash_connect(tcp_death_row, sk); 312 if (err) 313 goto late_failure; 314 315 sk_set_txhash(sk); 316 317 if (likely(!tp->repair)) { 318 if (!tp->write_seq) 319 WRITE_ONCE(tp->write_seq, 320 secure_tcpv6_seq(np->saddr.s6_addr32, 321 sk->sk_v6_daddr.s6_addr32, 322 inet->inet_sport, 323 inet->inet_dport)); 324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 325 np->saddr.s6_addr32, 326 sk->sk_v6_daddr.s6_addr32); 327 } 328 329 if (tcp_fastopen_defer_connect(sk, &err)) 330 return err; 331 if (err) 332 goto late_failure; 333 334 err = tcp_connect(sk); 335 if (err) 336 goto late_failure; 337 338 return 0; 339 340 late_failure: 341 tcp_set_state(sk, TCP_CLOSE); 342 failure: 343 inet->inet_dport = 0; 344 sk->sk_route_caps = 0; 345 return err; 346 } 347 348 static void tcp_v6_mtu_reduced(struct sock *sk) 349 { 350 struct dst_entry *dst; 351 352 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 353 return; 354 355 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info); 356 if (!dst) 357 return; 358 359 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 360 tcp_sync_mss(sk, dst_mtu(dst)); 361 tcp_simple_retransmit(sk); 362 } 363 } 364 365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 366 u8 type, u8 code, int offset, __be32 info) 367 { 368 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 369 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 370 struct net *net = dev_net(skb->dev); 371 struct request_sock *fastopen; 372 struct ipv6_pinfo *np; 373 struct tcp_sock *tp; 374 __u32 seq, snd_una; 375 struct sock *sk; 376 bool fatal; 377 int err; 378 379 sk = __inet6_lookup_established(net, &tcp_hashinfo, 380 &hdr->daddr, th->dest, 381 &hdr->saddr, ntohs(th->source), 382 skb->dev->ifindex, inet6_sdif(skb)); 383 384 if (!sk) { 385 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 386 ICMP6_MIB_INERRORS); 387 return -ENOENT; 388 } 389 390 if (sk->sk_state == TCP_TIME_WAIT) { 391 inet_twsk_put(inet_twsk(sk)); 392 return 0; 393 } 394 seq = ntohl(th->seq); 395 fatal = icmpv6_err_convert(type, code, &err); 396 if (sk->sk_state == TCP_NEW_SYN_RECV) { 397 tcp_req_err(sk, seq, fatal); 398 return 0; 399 } 400 401 bh_lock_sock(sk); 402 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 403 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 404 405 if (sk->sk_state == TCP_CLOSE) 406 goto out; 407 408 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { 409 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 410 goto out; 411 } 412 413 tp = tcp_sk(sk); 414 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 415 fastopen = rcu_dereference(tp->fastopen_rsk); 416 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 417 if (sk->sk_state != TCP_LISTEN && 418 !between(seq, snd_una, tp->snd_nxt)) { 419 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 420 goto out; 421 } 422 423 np = tcp_inet6_sk(sk); 424 425 if (type == NDISC_REDIRECT) { 426 if (!sock_owned_by_user(sk)) { 427 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 428 429 if (dst) 430 dst->ops->redirect(dst, sk, skb); 431 } 432 goto out; 433 } 434 435 if (type == ICMPV6_PKT_TOOBIG) { 436 /* We are not interested in TCP_LISTEN and open_requests 437 * (SYN-ACKs send out by Linux are always <576bytes so 438 * they should go through unfragmented). 439 */ 440 if (sk->sk_state == TCP_LISTEN) 441 goto out; 442 443 if (!ip6_sk_accept_pmtu(sk)) 444 goto out; 445 446 tp->mtu_info = ntohl(info); 447 if (!sock_owned_by_user(sk)) 448 tcp_v6_mtu_reduced(sk); 449 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 450 &sk->sk_tsq_flags)) 451 sock_hold(sk); 452 goto out; 453 } 454 455 456 /* Might be for an request_sock */ 457 switch (sk->sk_state) { 458 case TCP_SYN_SENT: 459 case TCP_SYN_RECV: 460 /* Only in fast or simultaneous open. If a fast open socket is 461 * already accepted it is treated as a connected one below. 462 */ 463 if (fastopen && !fastopen->sk) 464 break; 465 466 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 467 468 if (!sock_owned_by_user(sk)) { 469 sk->sk_err = err; 470 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 471 472 tcp_done(sk); 473 } else 474 sk->sk_err_soft = err; 475 goto out; 476 case TCP_LISTEN: 477 break; 478 default: 479 /* check if this ICMP message allows revert of backoff. 480 * (see RFC 6069) 481 */ 482 if (!fastopen && type == ICMPV6_DEST_UNREACH && 483 code == ICMPV6_NOROUTE) 484 tcp_ld_RTO_revert(sk, seq); 485 } 486 487 if (!sock_owned_by_user(sk) && np->recverr) { 488 sk->sk_err = err; 489 sk->sk_error_report(sk); 490 } else 491 sk->sk_err_soft = err; 492 493 out: 494 bh_unlock_sock(sk); 495 sock_put(sk); 496 return 0; 497 } 498 499 500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 501 struct flowi *fl, 502 struct request_sock *req, 503 struct tcp_fastopen_cookie *foc, 504 enum tcp_synack_type synack_type, 505 struct sk_buff *syn_skb) 506 { 507 struct inet_request_sock *ireq = inet_rsk(req); 508 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 509 struct ipv6_txoptions *opt; 510 struct flowi6 *fl6 = &fl->u.ip6; 511 struct sk_buff *skb; 512 int err = -ENOMEM; 513 u8 tclass; 514 515 /* First, grab a route. */ 516 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 517 IPPROTO_TCP)) == NULL) 518 goto done; 519 520 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 521 522 if (skb) { 523 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 524 &ireq->ir_v6_rmt_addr); 525 526 fl6->daddr = ireq->ir_v6_rmt_addr; 527 if (np->repflow && ireq->pktopts) 528 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 529 530 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? 531 tcp_rsk(req)->syn_tos & ~INET_ECN_MASK : 532 np->tclass; 533 534 if (!INET_ECN_is_capable(tclass) && 535 tcp_bpf_ca_needs_ecn((struct sock *)req)) 536 tclass |= INET_ECN_ECT_0; 537 538 rcu_read_lock(); 539 opt = ireq->ipv6_opt; 540 if (!opt) 541 opt = rcu_dereference(np->opt); 542 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, 543 tclass, sk->sk_priority); 544 rcu_read_unlock(); 545 err = net_xmit_eval(err); 546 } 547 548 done: 549 return err; 550 } 551 552 553 static void tcp_v6_reqsk_destructor(struct request_sock *req) 554 { 555 kfree(inet_rsk(req)->ipv6_opt); 556 kfree_skb(inet_rsk(req)->pktopts); 557 } 558 559 #ifdef CONFIG_TCP_MD5SIG 560 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 561 const struct in6_addr *addr, 562 int l3index) 563 { 564 return tcp_md5_do_lookup(sk, l3index, 565 (union tcp_md5_addr *)addr, AF_INET6); 566 } 567 568 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 569 const struct sock *addr_sk) 570 { 571 int l3index; 572 573 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 574 addr_sk->sk_bound_dev_if); 575 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 576 l3index); 577 } 578 579 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 580 sockptr_t optval, int optlen) 581 { 582 struct tcp_md5sig cmd; 583 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 584 int l3index = 0; 585 u8 prefixlen; 586 587 if (optlen < sizeof(cmd)) 588 return -EINVAL; 589 590 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 591 return -EFAULT; 592 593 if (sin6->sin6_family != AF_INET6) 594 return -EINVAL; 595 596 if (optname == TCP_MD5SIG_EXT && 597 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 598 prefixlen = cmd.tcpm_prefixlen; 599 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 600 prefixlen > 32)) 601 return -EINVAL; 602 } else { 603 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 604 } 605 606 if (optname == TCP_MD5SIG_EXT && 607 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 608 struct net_device *dev; 609 610 rcu_read_lock(); 611 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 612 if (dev && netif_is_l3_master(dev)) 613 l3index = dev->ifindex; 614 rcu_read_unlock(); 615 616 /* ok to reference set/not set outside of rcu; 617 * right now device MUST be an L3 master 618 */ 619 if (!dev || !l3index) 620 return -EINVAL; 621 } 622 623 if (!cmd.tcpm_keylen) { 624 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 625 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 626 AF_INET, prefixlen, 627 l3index); 628 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 629 AF_INET6, prefixlen, l3index); 630 } 631 632 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 633 return -EINVAL; 634 635 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 636 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 637 AF_INET, prefixlen, l3index, 638 cmd.tcpm_key, cmd.tcpm_keylen, 639 GFP_KERNEL); 640 641 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 642 AF_INET6, prefixlen, l3index, 643 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 644 } 645 646 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 647 const struct in6_addr *daddr, 648 const struct in6_addr *saddr, 649 const struct tcphdr *th, int nbytes) 650 { 651 struct tcp6_pseudohdr *bp; 652 struct scatterlist sg; 653 struct tcphdr *_th; 654 655 bp = hp->scratch; 656 /* 1. TCP pseudo-header (RFC2460) */ 657 bp->saddr = *saddr; 658 bp->daddr = *daddr; 659 bp->protocol = cpu_to_be32(IPPROTO_TCP); 660 bp->len = cpu_to_be32(nbytes); 661 662 _th = (struct tcphdr *)(bp + 1); 663 memcpy(_th, th, sizeof(*th)); 664 _th->check = 0; 665 666 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 667 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 668 sizeof(*bp) + sizeof(*th)); 669 return crypto_ahash_update(hp->md5_req); 670 } 671 672 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 673 const struct in6_addr *daddr, struct in6_addr *saddr, 674 const struct tcphdr *th) 675 { 676 struct tcp_md5sig_pool *hp; 677 struct ahash_request *req; 678 679 hp = tcp_get_md5sig_pool(); 680 if (!hp) 681 goto clear_hash_noput; 682 req = hp->md5_req; 683 684 if (crypto_ahash_init(req)) 685 goto clear_hash; 686 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 687 goto clear_hash; 688 if (tcp_md5_hash_key(hp, key)) 689 goto clear_hash; 690 ahash_request_set_crypt(req, NULL, md5_hash, 0); 691 if (crypto_ahash_final(req)) 692 goto clear_hash; 693 694 tcp_put_md5sig_pool(); 695 return 0; 696 697 clear_hash: 698 tcp_put_md5sig_pool(); 699 clear_hash_noput: 700 memset(md5_hash, 0, 16); 701 return 1; 702 } 703 704 static int tcp_v6_md5_hash_skb(char *md5_hash, 705 const struct tcp_md5sig_key *key, 706 const struct sock *sk, 707 const struct sk_buff *skb) 708 { 709 const struct in6_addr *saddr, *daddr; 710 struct tcp_md5sig_pool *hp; 711 struct ahash_request *req; 712 const struct tcphdr *th = tcp_hdr(skb); 713 714 if (sk) { /* valid for establish/request sockets */ 715 saddr = &sk->sk_v6_rcv_saddr; 716 daddr = &sk->sk_v6_daddr; 717 } else { 718 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 719 saddr = &ip6h->saddr; 720 daddr = &ip6h->daddr; 721 } 722 723 hp = tcp_get_md5sig_pool(); 724 if (!hp) 725 goto clear_hash_noput; 726 req = hp->md5_req; 727 728 if (crypto_ahash_init(req)) 729 goto clear_hash; 730 731 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 732 goto clear_hash; 733 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 734 goto clear_hash; 735 if (tcp_md5_hash_key(hp, key)) 736 goto clear_hash; 737 ahash_request_set_crypt(req, NULL, md5_hash, 0); 738 if (crypto_ahash_final(req)) 739 goto clear_hash; 740 741 tcp_put_md5sig_pool(); 742 return 0; 743 744 clear_hash: 745 tcp_put_md5sig_pool(); 746 clear_hash_noput: 747 memset(md5_hash, 0, 16); 748 return 1; 749 } 750 751 #endif 752 753 static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 754 const struct sk_buff *skb, 755 int dif, int sdif) 756 { 757 #ifdef CONFIG_TCP_MD5SIG 758 const __u8 *hash_location = NULL; 759 struct tcp_md5sig_key *hash_expected; 760 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 761 const struct tcphdr *th = tcp_hdr(skb); 762 int genhash, l3index; 763 u8 newhash[16]; 764 765 /* sdif set, means packet ingressed via a device 766 * in an L3 domain and dif is set to the l3mdev 767 */ 768 l3index = sdif ? dif : 0; 769 770 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); 771 hash_location = tcp_parse_md5sig_option(th); 772 773 /* We've parsed the options - do we have a hash? */ 774 if (!hash_expected && !hash_location) 775 return false; 776 777 if (hash_expected && !hash_location) { 778 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 779 return true; 780 } 781 782 if (!hash_expected && hash_location) { 783 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 784 return true; 785 } 786 787 /* check the signature */ 788 genhash = tcp_v6_md5_hash_skb(newhash, 789 hash_expected, 790 NULL, skb); 791 792 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 793 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 794 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", 795 genhash ? "failed" : "mismatch", 796 &ip6h->saddr, ntohs(th->source), 797 &ip6h->daddr, ntohs(th->dest), l3index); 798 return true; 799 } 800 #endif 801 return false; 802 } 803 804 static void tcp_v6_init_req(struct request_sock *req, 805 const struct sock *sk_listener, 806 struct sk_buff *skb) 807 { 808 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 809 struct inet_request_sock *ireq = inet_rsk(req); 810 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 811 812 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 813 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 814 815 /* So that link locals have meaning */ 816 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 817 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 818 ireq->ir_iif = tcp_v6_iif(skb); 819 820 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 821 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 822 np->rxopt.bits.rxinfo || 823 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 824 np->rxopt.bits.rxohlim || np->repflow)) { 825 refcount_inc(&skb->users); 826 ireq->pktopts = skb; 827 } 828 } 829 830 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 831 struct flowi *fl, 832 const struct request_sock *req) 833 { 834 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 835 } 836 837 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 838 .family = AF_INET6, 839 .obj_size = sizeof(struct tcp6_request_sock), 840 .rtx_syn_ack = tcp_rtx_synack, 841 .send_ack = tcp_v6_reqsk_send_ack, 842 .destructor = tcp_v6_reqsk_destructor, 843 .send_reset = tcp_v6_send_reset, 844 .syn_ack_timeout = tcp_syn_ack_timeout, 845 }; 846 847 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 848 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 849 sizeof(struct ipv6hdr), 850 #ifdef CONFIG_TCP_MD5SIG 851 .req_md5_lookup = tcp_v6_md5_lookup, 852 .calc_md5_hash = tcp_v6_md5_hash_skb, 853 #endif 854 .init_req = tcp_v6_init_req, 855 #ifdef CONFIG_SYN_COOKIES 856 .cookie_init_seq = cookie_v6_init_sequence, 857 #endif 858 .route_req = tcp_v6_route_req, 859 .init_seq = tcp_v6_init_seq, 860 .init_ts_off = tcp_v6_init_ts_off, 861 .send_synack = tcp_v6_send_synack, 862 }; 863 864 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 865 u32 ack, u32 win, u32 tsval, u32 tsecr, 866 int oif, struct tcp_md5sig_key *key, int rst, 867 u8 tclass, __be32 label, u32 priority) 868 { 869 const struct tcphdr *th = tcp_hdr(skb); 870 struct tcphdr *t1; 871 struct sk_buff *buff; 872 struct flowi6 fl6; 873 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 874 struct sock *ctl_sk = net->ipv6.tcp_sk; 875 unsigned int tot_len = sizeof(struct tcphdr); 876 struct dst_entry *dst; 877 __be32 *topt; 878 __u32 mark = 0; 879 880 if (tsecr) 881 tot_len += TCPOLEN_TSTAMP_ALIGNED; 882 #ifdef CONFIG_TCP_MD5SIG 883 if (key) 884 tot_len += TCPOLEN_MD5SIG_ALIGNED; 885 #endif 886 887 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, 888 GFP_ATOMIC); 889 if (!buff) 890 return; 891 892 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); 893 894 t1 = skb_push(buff, tot_len); 895 skb_reset_transport_header(buff); 896 897 /* Swap the send and the receive. */ 898 memset(t1, 0, sizeof(*t1)); 899 t1->dest = th->source; 900 t1->source = th->dest; 901 t1->doff = tot_len / 4; 902 t1->seq = htonl(seq); 903 t1->ack_seq = htonl(ack); 904 t1->ack = !rst || !th->ack; 905 t1->rst = rst; 906 t1->window = htons(win); 907 908 topt = (__be32 *)(t1 + 1); 909 910 if (tsecr) { 911 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 912 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 913 *topt++ = htonl(tsval); 914 *topt++ = htonl(tsecr); 915 } 916 917 #ifdef CONFIG_TCP_MD5SIG 918 if (key) { 919 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 920 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 921 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 922 &ipv6_hdr(skb)->saddr, 923 &ipv6_hdr(skb)->daddr, t1); 924 } 925 #endif 926 927 memset(&fl6, 0, sizeof(fl6)); 928 fl6.daddr = ipv6_hdr(skb)->saddr; 929 fl6.saddr = ipv6_hdr(skb)->daddr; 930 fl6.flowlabel = label; 931 932 buff->ip_summed = CHECKSUM_PARTIAL; 933 buff->csum = 0; 934 935 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 936 937 fl6.flowi6_proto = IPPROTO_TCP; 938 if (rt6_need_strict(&fl6.daddr) && !oif) 939 fl6.flowi6_oif = tcp_v6_iif(skb); 940 else { 941 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 942 oif = skb->skb_iif; 943 944 fl6.flowi6_oif = oif; 945 } 946 947 if (sk) { 948 if (sk->sk_state == TCP_TIME_WAIT) { 949 mark = inet_twsk(sk)->tw_mark; 950 /* autoflowlabel relies on buff->hash */ 951 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 952 PKT_HASH_TYPE_L4); 953 } else { 954 mark = sk->sk_mark; 955 } 956 buff->tstamp = tcp_transmit_time(sk); 957 } 958 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 959 fl6.fl6_dport = t1->dest; 960 fl6.fl6_sport = t1->source; 961 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 962 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 963 964 /* Pass a socket to ip6_dst_lookup either it is for RST 965 * Underlying function will use this to retrieve the network 966 * namespace 967 */ 968 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); 969 if (!IS_ERR(dst)) { 970 skb_dst_set(buff, dst); 971 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 972 tclass & ~INET_ECN_MASK, priority); 973 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 974 if (rst) 975 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 976 return; 977 } 978 979 kfree_skb(buff); 980 } 981 982 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 983 { 984 const struct tcphdr *th = tcp_hdr(skb); 985 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 986 u32 seq = 0, ack_seq = 0; 987 struct tcp_md5sig_key *key = NULL; 988 #ifdef CONFIG_TCP_MD5SIG 989 const __u8 *hash_location = NULL; 990 unsigned char newhash[16]; 991 int genhash; 992 struct sock *sk1 = NULL; 993 #endif 994 __be32 label = 0; 995 u32 priority = 0; 996 struct net *net; 997 int oif = 0; 998 999 if (th->rst) 1000 return; 1001 1002 /* If sk not NULL, it means we did a successful lookup and incoming 1003 * route had to be correct. prequeue might have dropped our dst. 1004 */ 1005 if (!sk && !ipv6_unicast_destination(skb)) 1006 return; 1007 1008 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1009 #ifdef CONFIG_TCP_MD5SIG 1010 rcu_read_lock(); 1011 hash_location = tcp_parse_md5sig_option(th); 1012 if (sk && sk_fullsock(sk)) { 1013 int l3index; 1014 1015 /* sdif set, means packet ingressed via a device 1016 * in an L3 domain and inet_iif is set to it. 1017 */ 1018 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1019 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1020 } else if (hash_location) { 1021 int dif = tcp_v6_iif_l3_slave(skb); 1022 int sdif = tcp_v6_sdif(skb); 1023 int l3index; 1024 1025 /* 1026 * active side is lost. Try to find listening socket through 1027 * source port, and then find md5 key through listening socket. 1028 * we are not loose security here: 1029 * Incoming packet is checked with md5 hash with finding key, 1030 * no RST generated if md5 hash doesn't match. 1031 */ 1032 sk1 = inet6_lookup_listener(net, 1033 &tcp_hashinfo, NULL, 0, 1034 &ipv6h->saddr, 1035 th->source, &ipv6h->daddr, 1036 ntohs(th->source), dif, sdif); 1037 if (!sk1) 1038 goto out; 1039 1040 /* sdif set, means packet ingressed via a device 1041 * in an L3 domain and dif is set to it. 1042 */ 1043 l3index = tcp_v6_sdif(skb) ? dif : 0; 1044 1045 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1046 if (!key) 1047 goto out; 1048 1049 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1050 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1051 goto out; 1052 } 1053 #endif 1054 1055 if (th->ack) 1056 seq = ntohl(th->ack_seq); 1057 else 1058 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1059 (th->doff << 2); 1060 1061 if (sk) { 1062 oif = sk->sk_bound_dev_if; 1063 if (sk_fullsock(sk)) { 1064 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1065 1066 trace_tcp_send_reset(sk, skb); 1067 if (np->repflow) 1068 label = ip6_flowlabel(ipv6h); 1069 priority = sk->sk_priority; 1070 } 1071 if (sk->sk_state == TCP_TIME_WAIT) { 1072 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1073 priority = inet_twsk(sk)->tw_priority; 1074 } 1075 } else { 1076 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1077 label = ip6_flowlabel(ipv6h); 1078 } 1079 1080 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1081 ipv6_get_dsfield(ipv6h), label, priority); 1082 1083 #ifdef CONFIG_TCP_MD5SIG 1084 out: 1085 rcu_read_unlock(); 1086 #endif 1087 } 1088 1089 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1090 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1091 struct tcp_md5sig_key *key, u8 tclass, 1092 __be32 label, u32 priority) 1093 { 1094 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1095 tclass, label, priority); 1096 } 1097 1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1099 { 1100 struct inet_timewait_sock *tw = inet_twsk(sk); 1101 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1102 1103 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1104 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1105 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1106 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1107 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1108 1109 inet_twsk_put(tw); 1110 } 1111 1112 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1113 struct request_sock *req) 1114 { 1115 int l3index; 1116 1117 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1118 1119 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1120 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1121 */ 1122 /* RFC 7323 2.3 1123 * The window field (SEG.WND) of every outgoing segment, with the 1124 * exception of <SYN> segments, MUST be right-shifted by 1125 * Rcv.Wind.Shift bits: 1126 */ 1127 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1128 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1129 tcp_rsk(req)->rcv_nxt, 1130 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1131 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1132 req->ts_recent, sk->sk_bound_dev_if, 1133 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1134 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1135 } 1136 1137 1138 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1139 { 1140 #ifdef CONFIG_SYN_COOKIES 1141 const struct tcphdr *th = tcp_hdr(skb); 1142 1143 if (!th->syn) 1144 sk = cookie_v6_check(sk, skb); 1145 #endif 1146 return sk; 1147 } 1148 1149 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1150 struct tcphdr *th, u32 *cookie) 1151 { 1152 u16 mss = 0; 1153 #ifdef CONFIG_SYN_COOKIES 1154 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1155 &tcp_request_sock_ipv6_ops, sk, th); 1156 if (mss) { 1157 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1158 tcp_synq_overflow(sk); 1159 } 1160 #endif 1161 return mss; 1162 } 1163 1164 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1165 { 1166 if (skb->protocol == htons(ETH_P_IP)) 1167 return tcp_v4_conn_request(sk, skb); 1168 1169 if (!ipv6_unicast_destination(skb)) 1170 goto drop; 1171 1172 return tcp_conn_request(&tcp6_request_sock_ops, 1173 &tcp_request_sock_ipv6_ops, sk, skb); 1174 1175 drop: 1176 tcp_listendrop(sk); 1177 return 0; /* don't send reset */ 1178 } 1179 1180 static void tcp_v6_restore_cb(struct sk_buff *skb) 1181 { 1182 /* We need to move header back to the beginning if xfrm6_policy_check() 1183 * and tcp_v6_fill_cb() are going to be called again. 1184 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1185 */ 1186 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1187 sizeof(struct inet6_skb_parm)); 1188 } 1189 1190 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1191 struct request_sock *req, 1192 struct dst_entry *dst, 1193 struct request_sock *req_unhash, 1194 bool *own_req) 1195 { 1196 struct inet_request_sock *ireq; 1197 struct ipv6_pinfo *newnp; 1198 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1199 struct ipv6_txoptions *opt; 1200 struct inet_sock *newinet; 1201 bool found_dup_sk = false; 1202 struct tcp_sock *newtp; 1203 struct sock *newsk; 1204 #ifdef CONFIG_TCP_MD5SIG 1205 struct tcp_md5sig_key *key; 1206 int l3index; 1207 #endif 1208 struct flowi6 fl6; 1209 1210 if (skb->protocol == htons(ETH_P_IP)) { 1211 /* 1212 * v6 mapped 1213 */ 1214 1215 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1216 req_unhash, own_req); 1217 1218 if (!newsk) 1219 return NULL; 1220 1221 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1222 1223 newinet = inet_sk(newsk); 1224 newnp = tcp_inet6_sk(newsk); 1225 newtp = tcp_sk(newsk); 1226 1227 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1228 1229 newnp->saddr = newsk->sk_v6_rcv_saddr; 1230 1231 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1232 if (sk_is_mptcp(newsk)) 1233 mptcpv6_handle_mapped(newsk, true); 1234 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1235 #ifdef CONFIG_TCP_MD5SIG 1236 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1237 #endif 1238 1239 newnp->ipv6_mc_list = NULL; 1240 newnp->ipv6_ac_list = NULL; 1241 newnp->ipv6_fl_list = NULL; 1242 newnp->pktoptions = NULL; 1243 newnp->opt = NULL; 1244 newnp->mcast_oif = inet_iif(skb); 1245 newnp->mcast_hops = ip_hdr(skb)->ttl; 1246 newnp->rcv_flowinfo = 0; 1247 if (np->repflow) 1248 newnp->flow_label = 0; 1249 1250 /* 1251 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1252 * here, tcp_create_openreq_child now does this for us, see the comment in 1253 * that function for the gory details. -acme 1254 */ 1255 1256 /* It is tricky place. Until this moment IPv4 tcp 1257 worked with IPv6 icsk.icsk_af_ops. 1258 Sync it now. 1259 */ 1260 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1261 1262 return newsk; 1263 } 1264 1265 ireq = inet_rsk(req); 1266 1267 if (sk_acceptq_is_full(sk)) 1268 goto out_overflow; 1269 1270 if (!dst) { 1271 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1272 if (!dst) 1273 goto out; 1274 } 1275 1276 newsk = tcp_create_openreq_child(sk, req, skb); 1277 if (!newsk) 1278 goto out_nonewsk; 1279 1280 /* 1281 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1282 * count here, tcp_create_openreq_child now does this for us, see the 1283 * comment in that function for the gory details. -acme 1284 */ 1285 1286 newsk->sk_gso_type = SKB_GSO_TCPV6; 1287 ip6_dst_store(newsk, dst, NULL, NULL); 1288 inet6_sk_rx_dst_set(newsk, skb); 1289 1290 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1291 1292 newtp = tcp_sk(newsk); 1293 newinet = inet_sk(newsk); 1294 newnp = tcp_inet6_sk(newsk); 1295 1296 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1297 1298 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1299 newnp->saddr = ireq->ir_v6_loc_addr; 1300 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1301 newsk->sk_bound_dev_if = ireq->ir_iif; 1302 1303 /* Now IPv6 options... 1304 1305 First: no IPv4 options. 1306 */ 1307 newinet->inet_opt = NULL; 1308 newnp->ipv6_mc_list = NULL; 1309 newnp->ipv6_ac_list = NULL; 1310 newnp->ipv6_fl_list = NULL; 1311 1312 /* Clone RX bits */ 1313 newnp->rxopt.all = np->rxopt.all; 1314 1315 newnp->pktoptions = NULL; 1316 newnp->opt = NULL; 1317 newnp->mcast_oif = tcp_v6_iif(skb); 1318 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1319 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1320 if (np->repflow) 1321 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1322 1323 /* Set ToS of the new socket based upon the value of incoming SYN. */ 1324 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) 1325 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1326 1327 /* Clone native IPv6 options from listening socket (if any) 1328 1329 Yes, keeping reference count would be much more clever, 1330 but we make one more one thing there: reattach optmem 1331 to newsk. 1332 */ 1333 opt = ireq->ipv6_opt; 1334 if (!opt) 1335 opt = rcu_dereference(np->opt); 1336 if (opt) { 1337 opt = ipv6_dup_options(newsk, opt); 1338 RCU_INIT_POINTER(newnp->opt, opt); 1339 } 1340 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1341 if (opt) 1342 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1343 opt->opt_flen; 1344 1345 tcp_ca_openreq_child(newsk, dst); 1346 1347 tcp_sync_mss(newsk, dst_mtu(dst)); 1348 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1349 1350 tcp_initialize_rcv_mss(newsk); 1351 1352 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1353 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1354 1355 #ifdef CONFIG_TCP_MD5SIG 1356 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1357 1358 /* Copy over the MD5 key from the original socket */ 1359 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1360 if (key) { 1361 /* We're using one, so create a matching key 1362 * on the newsk structure. If we fail to get 1363 * memory, then we end up not copying the key 1364 * across. Shucks. 1365 */ 1366 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1367 AF_INET6, 128, l3index, key->key, key->keylen, 1368 sk_gfp_mask(sk, GFP_ATOMIC)); 1369 } 1370 #endif 1371 1372 if (__inet_inherit_port(sk, newsk) < 0) { 1373 inet_csk_prepare_forced_close(newsk); 1374 tcp_done(newsk); 1375 goto out; 1376 } 1377 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1378 &found_dup_sk); 1379 if (*own_req) { 1380 tcp_move_syn(newtp, req); 1381 1382 /* Clone pktoptions received with SYN, if we own the req */ 1383 if (ireq->pktopts) { 1384 newnp->pktoptions = skb_clone(ireq->pktopts, 1385 sk_gfp_mask(sk, GFP_ATOMIC)); 1386 consume_skb(ireq->pktopts); 1387 ireq->pktopts = NULL; 1388 if (newnp->pktoptions) { 1389 tcp_v6_restore_cb(newnp->pktoptions); 1390 skb_set_owner_r(newnp->pktoptions, newsk); 1391 } 1392 } 1393 } else { 1394 if (!req_unhash && found_dup_sk) { 1395 /* This code path should only be executed in the 1396 * syncookie case only 1397 */ 1398 bh_unlock_sock(newsk); 1399 sock_put(newsk); 1400 newsk = NULL; 1401 } 1402 } 1403 1404 return newsk; 1405 1406 out_overflow: 1407 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1408 out_nonewsk: 1409 dst_release(dst); 1410 out: 1411 tcp_listendrop(sk); 1412 return NULL; 1413 } 1414 1415 /* The socket must have it's spinlock held when we get 1416 * here, unless it is a TCP_LISTEN socket. 1417 * 1418 * We have a potential double-lock case here, so even when 1419 * doing backlog processing we use the BH locking scheme. 1420 * This is because we cannot sleep with the original spinlock 1421 * held. 1422 */ 1423 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1424 { 1425 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1426 struct sk_buff *opt_skb = NULL; 1427 struct tcp_sock *tp; 1428 1429 /* Imagine: socket is IPv6. IPv4 packet arrives, 1430 goes to IPv4 receive handler and backlogged. 1431 From backlog it always goes here. Kerboom... 1432 Fortunately, tcp_rcv_established and rcv_established 1433 handle them correctly, but it is not case with 1434 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1435 */ 1436 1437 if (skb->protocol == htons(ETH_P_IP)) 1438 return tcp_v4_do_rcv(sk, skb); 1439 1440 /* 1441 * socket locking is here for SMP purposes as backlog rcv 1442 * is currently called with bh processing disabled. 1443 */ 1444 1445 /* Do Stevens' IPV6_PKTOPTIONS. 1446 1447 Yes, guys, it is the only place in our code, where we 1448 may make it not affecting IPv4. 1449 The rest of code is protocol independent, 1450 and I do not like idea to uglify IPv4. 1451 1452 Actually, all the idea behind IPV6_PKTOPTIONS 1453 looks not very well thought. For now we latch 1454 options, received in the last packet, enqueued 1455 by tcp. Feel free to propose better solution. 1456 --ANK (980728) 1457 */ 1458 if (np->rxopt.all) 1459 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1460 1461 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1462 struct dst_entry *dst = sk->sk_rx_dst; 1463 1464 sock_rps_save_rxhash(sk, skb); 1465 sk_mark_napi_id(sk, skb); 1466 if (dst) { 1467 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 1468 dst->ops->check(dst, np->rx_dst_cookie) == NULL) { 1469 dst_release(dst); 1470 sk->sk_rx_dst = NULL; 1471 } 1472 } 1473 1474 tcp_rcv_established(sk, skb); 1475 if (opt_skb) 1476 goto ipv6_pktoptions; 1477 return 0; 1478 } 1479 1480 if (tcp_checksum_complete(skb)) 1481 goto csum_err; 1482 1483 if (sk->sk_state == TCP_LISTEN) { 1484 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1485 1486 if (!nsk) 1487 goto discard; 1488 1489 if (nsk != sk) { 1490 if (tcp_child_process(sk, nsk, skb)) 1491 goto reset; 1492 if (opt_skb) 1493 __kfree_skb(opt_skb); 1494 return 0; 1495 } 1496 } else 1497 sock_rps_save_rxhash(sk, skb); 1498 1499 if (tcp_rcv_state_process(sk, skb)) 1500 goto reset; 1501 if (opt_skb) 1502 goto ipv6_pktoptions; 1503 return 0; 1504 1505 reset: 1506 tcp_v6_send_reset(sk, skb); 1507 discard: 1508 if (opt_skb) 1509 __kfree_skb(opt_skb); 1510 kfree_skb(skb); 1511 return 0; 1512 csum_err: 1513 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1514 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1515 goto discard; 1516 1517 1518 ipv6_pktoptions: 1519 /* Do you ask, what is it? 1520 1521 1. skb was enqueued by tcp. 1522 2. skb is added to tail of read queue, rather than out of order. 1523 3. socket is not in passive state. 1524 4. Finally, it really contains options, which user wants to receive. 1525 */ 1526 tp = tcp_sk(sk); 1527 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1528 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1529 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1530 np->mcast_oif = tcp_v6_iif(opt_skb); 1531 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1532 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1533 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1534 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1535 if (np->repflow) 1536 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1537 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1538 skb_set_owner_r(opt_skb, sk); 1539 tcp_v6_restore_cb(opt_skb); 1540 opt_skb = xchg(&np->pktoptions, opt_skb); 1541 } else { 1542 __kfree_skb(opt_skb); 1543 opt_skb = xchg(&np->pktoptions, NULL); 1544 } 1545 } 1546 1547 kfree_skb(opt_skb); 1548 return 0; 1549 } 1550 1551 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1552 const struct tcphdr *th) 1553 { 1554 /* This is tricky: we move IP6CB at its correct location into 1555 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1556 * _decode_session6() uses IP6CB(). 1557 * barrier() makes sure compiler won't play aliasing games. 1558 */ 1559 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1560 sizeof(struct inet6_skb_parm)); 1561 barrier(); 1562 1563 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1564 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1565 skb->len - th->doff*4); 1566 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1567 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1568 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1569 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1570 TCP_SKB_CB(skb)->sacked = 0; 1571 TCP_SKB_CB(skb)->has_rxtstamp = 1572 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1573 } 1574 1575 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1576 { 1577 struct sk_buff *skb_to_free; 1578 int sdif = inet6_sdif(skb); 1579 int dif = inet6_iif(skb); 1580 const struct tcphdr *th; 1581 const struct ipv6hdr *hdr; 1582 bool refcounted; 1583 struct sock *sk; 1584 int ret; 1585 struct net *net = dev_net(skb->dev); 1586 1587 if (skb->pkt_type != PACKET_HOST) 1588 goto discard_it; 1589 1590 /* 1591 * Count it even if it's bad. 1592 */ 1593 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1594 1595 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1596 goto discard_it; 1597 1598 th = (const struct tcphdr *)skb->data; 1599 1600 if (unlikely(th->doff < sizeof(struct tcphdr)/4)) 1601 goto bad_packet; 1602 if (!pskb_may_pull(skb, th->doff*4)) 1603 goto discard_it; 1604 1605 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1606 goto csum_error; 1607 1608 th = (const struct tcphdr *)skb->data; 1609 hdr = ipv6_hdr(skb); 1610 1611 lookup: 1612 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1613 th->source, th->dest, inet6_iif(skb), sdif, 1614 &refcounted); 1615 if (!sk) 1616 goto no_tcp_socket; 1617 1618 process: 1619 if (sk->sk_state == TCP_TIME_WAIT) 1620 goto do_time_wait; 1621 1622 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1623 struct request_sock *req = inet_reqsk(sk); 1624 bool req_stolen = false; 1625 struct sock *nsk; 1626 1627 sk = req->rsk_listener; 1628 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { 1629 sk_drops_add(sk, skb); 1630 reqsk_put(req); 1631 goto discard_it; 1632 } 1633 if (tcp_checksum_complete(skb)) { 1634 reqsk_put(req); 1635 goto csum_error; 1636 } 1637 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1638 inet_csk_reqsk_queue_drop_and_put(sk, req); 1639 goto lookup; 1640 } 1641 sock_hold(sk); 1642 refcounted = true; 1643 nsk = NULL; 1644 if (!tcp_filter(sk, skb)) { 1645 th = (const struct tcphdr *)skb->data; 1646 hdr = ipv6_hdr(skb); 1647 tcp_v6_fill_cb(skb, hdr, th); 1648 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1649 } 1650 if (!nsk) { 1651 reqsk_put(req); 1652 if (req_stolen) { 1653 /* Another cpu got exclusive access to req 1654 * and created a full blown socket. 1655 * Try to feed this packet to this socket 1656 * instead of discarding it. 1657 */ 1658 tcp_v6_restore_cb(skb); 1659 sock_put(sk); 1660 goto lookup; 1661 } 1662 goto discard_and_relse; 1663 } 1664 if (nsk == sk) { 1665 reqsk_put(req); 1666 tcp_v6_restore_cb(skb); 1667 } else if (tcp_child_process(sk, nsk, skb)) { 1668 tcp_v6_send_reset(nsk, skb); 1669 goto discard_and_relse; 1670 } else { 1671 sock_put(sk); 1672 return 0; 1673 } 1674 } 1675 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { 1676 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1677 goto discard_and_relse; 1678 } 1679 1680 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1681 goto discard_and_relse; 1682 1683 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) 1684 goto discard_and_relse; 1685 1686 if (tcp_filter(sk, skb)) 1687 goto discard_and_relse; 1688 th = (const struct tcphdr *)skb->data; 1689 hdr = ipv6_hdr(skb); 1690 tcp_v6_fill_cb(skb, hdr, th); 1691 1692 skb->dev = NULL; 1693 1694 if (sk->sk_state == TCP_LISTEN) { 1695 ret = tcp_v6_do_rcv(sk, skb); 1696 goto put_and_return; 1697 } 1698 1699 sk_incoming_cpu_update(sk); 1700 1701 bh_lock_sock_nested(sk); 1702 tcp_segs_in(tcp_sk(sk), skb); 1703 ret = 0; 1704 if (!sock_owned_by_user(sk)) { 1705 skb_to_free = sk->sk_rx_skb_cache; 1706 sk->sk_rx_skb_cache = NULL; 1707 ret = tcp_v6_do_rcv(sk, skb); 1708 } else { 1709 if (tcp_add_backlog(sk, skb)) 1710 goto discard_and_relse; 1711 skb_to_free = NULL; 1712 } 1713 bh_unlock_sock(sk); 1714 if (skb_to_free) 1715 __kfree_skb(skb_to_free); 1716 put_and_return: 1717 if (refcounted) 1718 sock_put(sk); 1719 return ret ? -1 : 0; 1720 1721 no_tcp_socket: 1722 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1723 goto discard_it; 1724 1725 tcp_v6_fill_cb(skb, hdr, th); 1726 1727 if (tcp_checksum_complete(skb)) { 1728 csum_error: 1729 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1730 bad_packet: 1731 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1732 } else { 1733 tcp_v6_send_reset(NULL, skb); 1734 } 1735 1736 discard_it: 1737 kfree_skb(skb); 1738 return 0; 1739 1740 discard_and_relse: 1741 sk_drops_add(sk, skb); 1742 if (refcounted) 1743 sock_put(sk); 1744 goto discard_it; 1745 1746 do_time_wait: 1747 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1748 inet_twsk_put(inet_twsk(sk)); 1749 goto discard_it; 1750 } 1751 1752 tcp_v6_fill_cb(skb, hdr, th); 1753 1754 if (tcp_checksum_complete(skb)) { 1755 inet_twsk_put(inet_twsk(sk)); 1756 goto csum_error; 1757 } 1758 1759 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1760 case TCP_TW_SYN: 1761 { 1762 struct sock *sk2; 1763 1764 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1765 skb, __tcp_hdrlen(th), 1766 &ipv6_hdr(skb)->saddr, th->source, 1767 &ipv6_hdr(skb)->daddr, 1768 ntohs(th->dest), 1769 tcp_v6_iif_l3_slave(skb), 1770 sdif); 1771 if (sk2) { 1772 struct inet_timewait_sock *tw = inet_twsk(sk); 1773 inet_twsk_deschedule_put(tw); 1774 sk = sk2; 1775 tcp_v6_restore_cb(skb); 1776 refcounted = false; 1777 goto process; 1778 } 1779 } 1780 /* to ACK */ 1781 fallthrough; 1782 case TCP_TW_ACK: 1783 tcp_v6_timewait_ack(sk, skb); 1784 break; 1785 case TCP_TW_RST: 1786 tcp_v6_send_reset(sk, skb); 1787 inet_twsk_deschedule_put(inet_twsk(sk)); 1788 goto discard_it; 1789 case TCP_TW_SUCCESS: 1790 ; 1791 } 1792 goto discard_it; 1793 } 1794 1795 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) 1796 { 1797 const struct ipv6hdr *hdr; 1798 const struct tcphdr *th; 1799 struct sock *sk; 1800 1801 if (skb->pkt_type != PACKET_HOST) 1802 return; 1803 1804 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1805 return; 1806 1807 hdr = ipv6_hdr(skb); 1808 th = tcp_hdr(skb); 1809 1810 if (th->doff < sizeof(struct tcphdr) / 4) 1811 return; 1812 1813 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1814 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1815 &hdr->saddr, th->source, 1816 &hdr->daddr, ntohs(th->dest), 1817 inet6_iif(skb), inet6_sdif(skb)); 1818 if (sk) { 1819 skb->sk = sk; 1820 skb->destructor = sock_edemux; 1821 if (sk_fullsock(sk)) { 1822 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); 1823 1824 if (dst) 1825 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); 1826 if (dst && 1827 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 1828 skb_dst_set_noref(skb, dst); 1829 } 1830 } 1831 } 1832 1833 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1834 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1835 .twsk_unique = tcp_twsk_unique, 1836 .twsk_destructor = tcp_twsk_destructor, 1837 }; 1838 1839 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1840 { 1841 struct ipv6_pinfo *np = inet6_sk(sk); 1842 1843 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); 1844 } 1845 1846 const struct inet_connection_sock_af_ops ipv6_specific = { 1847 .queue_xmit = inet6_csk_xmit, 1848 .send_check = tcp_v6_send_check, 1849 .rebuild_header = inet6_sk_rebuild_header, 1850 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1851 .conn_request = tcp_v6_conn_request, 1852 .syn_recv_sock = tcp_v6_syn_recv_sock, 1853 .net_header_len = sizeof(struct ipv6hdr), 1854 .net_frag_header_len = sizeof(struct frag_hdr), 1855 .setsockopt = ipv6_setsockopt, 1856 .getsockopt = ipv6_getsockopt, 1857 .addr2sockaddr = inet6_csk_addr2sockaddr, 1858 .sockaddr_len = sizeof(struct sockaddr_in6), 1859 .mtu_reduced = tcp_v6_mtu_reduced, 1860 }; 1861 1862 #ifdef CONFIG_TCP_MD5SIG 1863 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1864 .md5_lookup = tcp_v6_md5_lookup, 1865 .calc_md5_hash = tcp_v6_md5_hash_skb, 1866 .md5_parse = tcp_v6_parse_md5_keys, 1867 }; 1868 #endif 1869 1870 /* 1871 * TCP over IPv4 via INET6 API 1872 */ 1873 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1874 .queue_xmit = ip_queue_xmit, 1875 .send_check = tcp_v4_send_check, 1876 .rebuild_header = inet_sk_rebuild_header, 1877 .sk_rx_dst_set = inet_sk_rx_dst_set, 1878 .conn_request = tcp_v6_conn_request, 1879 .syn_recv_sock = tcp_v6_syn_recv_sock, 1880 .net_header_len = sizeof(struct iphdr), 1881 .setsockopt = ipv6_setsockopt, 1882 .getsockopt = ipv6_getsockopt, 1883 .addr2sockaddr = inet6_csk_addr2sockaddr, 1884 .sockaddr_len = sizeof(struct sockaddr_in6), 1885 .mtu_reduced = tcp_v4_mtu_reduced, 1886 }; 1887 1888 #ifdef CONFIG_TCP_MD5SIG 1889 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1890 .md5_lookup = tcp_v4_md5_lookup, 1891 .calc_md5_hash = tcp_v4_md5_hash_skb, 1892 .md5_parse = tcp_v6_parse_md5_keys, 1893 }; 1894 #endif 1895 1896 /* NOTE: A lot of things set to zero explicitly by call to 1897 * sk_alloc() so need not be done here. 1898 */ 1899 static int tcp_v6_init_sock(struct sock *sk) 1900 { 1901 struct inet_connection_sock *icsk = inet_csk(sk); 1902 1903 tcp_init_sock(sk); 1904 1905 icsk->icsk_af_ops = &ipv6_specific; 1906 1907 #ifdef CONFIG_TCP_MD5SIG 1908 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1909 #endif 1910 1911 return 0; 1912 } 1913 1914 static void tcp_v6_destroy_sock(struct sock *sk) 1915 { 1916 tcp_v4_destroy_sock(sk); 1917 inet6_destroy_sock(sk); 1918 } 1919 1920 #ifdef CONFIG_PROC_FS 1921 /* Proc filesystem TCPv6 sock list dumping. */ 1922 static void get_openreq6(struct seq_file *seq, 1923 const struct request_sock *req, int i) 1924 { 1925 long ttd = req->rsk_timer.expires - jiffies; 1926 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1927 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1928 1929 if (ttd < 0) 1930 ttd = 0; 1931 1932 seq_printf(seq, 1933 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1934 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1935 i, 1936 src->s6_addr32[0], src->s6_addr32[1], 1937 src->s6_addr32[2], src->s6_addr32[3], 1938 inet_rsk(req)->ir_num, 1939 dest->s6_addr32[0], dest->s6_addr32[1], 1940 dest->s6_addr32[2], dest->s6_addr32[3], 1941 ntohs(inet_rsk(req)->ir_rmt_port), 1942 TCP_SYN_RECV, 1943 0, 0, /* could print option size, but that is af dependent. */ 1944 1, /* timers active (only the expire timer) */ 1945 jiffies_to_clock_t(ttd), 1946 req->num_timeout, 1947 from_kuid_munged(seq_user_ns(seq), 1948 sock_i_uid(req->rsk_listener)), 1949 0, /* non standard timer */ 1950 0, /* open_requests have no inode */ 1951 0, req); 1952 } 1953 1954 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 1955 { 1956 const struct in6_addr *dest, *src; 1957 __u16 destp, srcp; 1958 int timer_active; 1959 unsigned long timer_expires; 1960 const struct inet_sock *inet = inet_sk(sp); 1961 const struct tcp_sock *tp = tcp_sk(sp); 1962 const struct inet_connection_sock *icsk = inet_csk(sp); 1963 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1964 int rx_queue; 1965 int state; 1966 1967 dest = &sp->sk_v6_daddr; 1968 src = &sp->sk_v6_rcv_saddr; 1969 destp = ntohs(inet->inet_dport); 1970 srcp = ntohs(inet->inet_sport); 1971 1972 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 1973 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 1974 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 1975 timer_active = 1; 1976 timer_expires = icsk->icsk_timeout; 1977 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 1978 timer_active = 4; 1979 timer_expires = icsk->icsk_timeout; 1980 } else if (timer_pending(&sp->sk_timer)) { 1981 timer_active = 2; 1982 timer_expires = sp->sk_timer.expires; 1983 } else { 1984 timer_active = 0; 1985 timer_expires = jiffies; 1986 } 1987 1988 state = inet_sk_state_load(sp); 1989 if (state == TCP_LISTEN) 1990 rx_queue = READ_ONCE(sp->sk_ack_backlog); 1991 else 1992 /* Because we don't lock the socket, 1993 * we might find a transient negative value. 1994 */ 1995 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 1996 READ_ONCE(tp->copied_seq), 0); 1997 1998 seq_printf(seq, 1999 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2000 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2001 i, 2002 src->s6_addr32[0], src->s6_addr32[1], 2003 src->s6_addr32[2], src->s6_addr32[3], srcp, 2004 dest->s6_addr32[0], dest->s6_addr32[1], 2005 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2006 state, 2007 READ_ONCE(tp->write_seq) - tp->snd_una, 2008 rx_queue, 2009 timer_active, 2010 jiffies_delta_to_clock_t(timer_expires - jiffies), 2011 icsk->icsk_retransmits, 2012 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2013 icsk->icsk_probes_out, 2014 sock_i_ino(sp), 2015 refcount_read(&sp->sk_refcnt), sp, 2016 jiffies_to_clock_t(icsk->icsk_rto), 2017 jiffies_to_clock_t(icsk->icsk_ack.ato), 2018 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2019 tp->snd_cwnd, 2020 state == TCP_LISTEN ? 2021 fastopenq->max_qlen : 2022 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2023 ); 2024 } 2025 2026 static void get_timewait6_sock(struct seq_file *seq, 2027 struct inet_timewait_sock *tw, int i) 2028 { 2029 long delta = tw->tw_timer.expires - jiffies; 2030 const struct in6_addr *dest, *src; 2031 __u16 destp, srcp; 2032 2033 dest = &tw->tw_v6_daddr; 2034 src = &tw->tw_v6_rcv_saddr; 2035 destp = ntohs(tw->tw_dport); 2036 srcp = ntohs(tw->tw_sport); 2037 2038 seq_printf(seq, 2039 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2040 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2041 i, 2042 src->s6_addr32[0], src->s6_addr32[1], 2043 src->s6_addr32[2], src->s6_addr32[3], srcp, 2044 dest->s6_addr32[0], dest->s6_addr32[1], 2045 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2046 tw->tw_substate, 0, 0, 2047 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2048 refcount_read(&tw->tw_refcnt), tw); 2049 } 2050 2051 static int tcp6_seq_show(struct seq_file *seq, void *v) 2052 { 2053 struct tcp_iter_state *st; 2054 struct sock *sk = v; 2055 2056 if (v == SEQ_START_TOKEN) { 2057 seq_puts(seq, 2058 " sl " 2059 "local_address " 2060 "remote_address " 2061 "st tx_queue rx_queue tr tm->when retrnsmt" 2062 " uid timeout inode\n"); 2063 goto out; 2064 } 2065 st = seq->private; 2066 2067 if (sk->sk_state == TCP_TIME_WAIT) 2068 get_timewait6_sock(seq, v, st->num); 2069 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2070 get_openreq6(seq, v, st->num); 2071 else 2072 get_tcp6_sock(seq, v, st->num); 2073 out: 2074 return 0; 2075 } 2076 2077 static const struct seq_operations tcp6_seq_ops = { 2078 .show = tcp6_seq_show, 2079 .start = tcp_seq_start, 2080 .next = tcp_seq_next, 2081 .stop = tcp_seq_stop, 2082 }; 2083 2084 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2085 .family = AF_INET6, 2086 }; 2087 2088 int __net_init tcp6_proc_init(struct net *net) 2089 { 2090 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2091 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2092 return -ENOMEM; 2093 return 0; 2094 } 2095 2096 void tcp6_proc_exit(struct net *net) 2097 { 2098 remove_proc_entry("tcp6", net->proc_net); 2099 } 2100 #endif 2101 2102 struct proto tcpv6_prot = { 2103 .name = "TCPv6", 2104 .owner = THIS_MODULE, 2105 .close = tcp_close, 2106 .pre_connect = tcp_v6_pre_connect, 2107 .connect = tcp_v6_connect, 2108 .disconnect = tcp_disconnect, 2109 .accept = inet_csk_accept, 2110 .ioctl = tcp_ioctl, 2111 .init = tcp_v6_init_sock, 2112 .destroy = tcp_v6_destroy_sock, 2113 .shutdown = tcp_shutdown, 2114 .setsockopt = tcp_setsockopt, 2115 .getsockopt = tcp_getsockopt, 2116 .keepalive = tcp_set_keepalive, 2117 .recvmsg = tcp_recvmsg, 2118 .sendmsg = tcp_sendmsg, 2119 .sendpage = tcp_sendpage, 2120 .backlog_rcv = tcp_v6_do_rcv, 2121 .release_cb = tcp_release_cb, 2122 .hash = inet6_hash, 2123 .unhash = inet_unhash, 2124 .get_port = inet_csk_get_port, 2125 .enter_memory_pressure = tcp_enter_memory_pressure, 2126 .leave_memory_pressure = tcp_leave_memory_pressure, 2127 .stream_memory_free = tcp_stream_memory_free, 2128 .sockets_allocated = &tcp_sockets_allocated, 2129 .memory_allocated = &tcp_memory_allocated, 2130 .memory_pressure = &tcp_memory_pressure, 2131 .orphan_count = &tcp_orphan_count, 2132 .sysctl_mem = sysctl_tcp_mem, 2133 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2134 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2135 .max_header = MAX_TCP_HEADER, 2136 .obj_size = sizeof(struct tcp6_sock), 2137 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2138 .twsk_prot = &tcp6_timewait_sock_ops, 2139 .rsk_prot = &tcp6_request_sock_ops, 2140 .h.hashinfo = &tcp_hashinfo, 2141 .no_autobind = true, 2142 .diag_destroy = tcp_abort, 2143 }; 2144 EXPORT_SYMBOL_GPL(tcpv6_prot); 2145 2146 /* thinking of making this const? Don't. 2147 * early_demux can change based on sysctl. 2148 */ 2149 static struct inet6_protocol tcpv6_protocol = { 2150 .early_demux = tcp_v6_early_demux, 2151 .early_demux_handler = tcp_v6_early_demux, 2152 .handler = tcp_v6_rcv, 2153 .err_handler = tcp_v6_err, 2154 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2155 }; 2156 2157 static struct inet_protosw tcpv6_protosw = { 2158 .type = SOCK_STREAM, 2159 .protocol = IPPROTO_TCP, 2160 .prot = &tcpv6_prot, 2161 .ops = &inet6_stream_ops, 2162 .flags = INET_PROTOSW_PERMANENT | 2163 INET_PROTOSW_ICSK, 2164 }; 2165 2166 static int __net_init tcpv6_net_init(struct net *net) 2167 { 2168 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2169 SOCK_RAW, IPPROTO_TCP, net); 2170 } 2171 2172 static void __net_exit tcpv6_net_exit(struct net *net) 2173 { 2174 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2175 } 2176 2177 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2178 { 2179 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2180 } 2181 2182 static struct pernet_operations tcpv6_net_ops = { 2183 .init = tcpv6_net_init, 2184 .exit = tcpv6_net_exit, 2185 .exit_batch = tcpv6_net_exit_batch, 2186 }; 2187 2188 int __init tcpv6_init(void) 2189 { 2190 int ret; 2191 2192 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2193 if (ret) 2194 goto out; 2195 2196 /* register inet6 protocol */ 2197 ret = inet6_register_protosw(&tcpv6_protosw); 2198 if (ret) 2199 goto out_tcpv6_protocol; 2200 2201 ret = register_pernet_subsys(&tcpv6_net_ops); 2202 if (ret) 2203 goto out_tcpv6_protosw; 2204 2205 ret = mptcpv6_init(); 2206 if (ret) 2207 goto out_tcpv6_pernet_subsys; 2208 2209 out: 2210 return ret; 2211 2212 out_tcpv6_pernet_subsys: 2213 unregister_pernet_subsys(&tcpv6_net_ops); 2214 out_tcpv6_protosw: 2215 inet6_unregister_protosw(&tcpv6_protosw); 2216 out_tcpv6_protocol: 2217 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2218 goto out; 2219 } 2220 2221 void tcpv6_exit(void) 2222 { 2223 unregister_pernet_subsys(&tcpv6_net_ops); 2224 inet6_unregister_protosw(&tcpv6_protosw); 2225 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2226 } 2227