1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 sk->sk_rx_dst = dst; 111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 152 struct tcp_sock *tp = tcp_sk(sk); 153 struct in6_addr *saddr = NULL, *final_p, final; 154 struct ipv6_txoptions *opt; 155 struct flowi6 fl6; 156 struct dst_entry *dst; 157 int addr_type; 158 int err; 159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (__ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 err = inet6_hash_connect(tcp_death_row, sk); 312 if (err) 313 goto late_failure; 314 315 sk_set_txhash(sk); 316 317 if (likely(!tp->repair)) { 318 if (!tp->write_seq) 319 WRITE_ONCE(tp->write_seq, 320 secure_tcpv6_seq(np->saddr.s6_addr32, 321 sk->sk_v6_daddr.s6_addr32, 322 inet->inet_sport, 323 inet->inet_dport)); 324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 325 np->saddr.s6_addr32, 326 sk->sk_v6_daddr.s6_addr32); 327 } 328 329 if (tcp_fastopen_defer_connect(sk, &err)) 330 return err; 331 if (err) 332 goto late_failure; 333 334 err = tcp_connect(sk); 335 if (err) 336 goto late_failure; 337 338 return 0; 339 340 late_failure: 341 tcp_set_state(sk, TCP_CLOSE); 342 failure: 343 inet->inet_dport = 0; 344 sk->sk_route_caps = 0; 345 return err; 346 } 347 348 static void tcp_v6_mtu_reduced(struct sock *sk) 349 { 350 struct dst_entry *dst; 351 352 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 353 return; 354 355 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info); 356 if (!dst) 357 return; 358 359 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 360 tcp_sync_mss(sk, dst_mtu(dst)); 361 tcp_simple_retransmit(sk); 362 } 363 } 364 365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 366 u8 type, u8 code, int offset, __be32 info) 367 { 368 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 369 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 370 struct net *net = dev_net(skb->dev); 371 struct request_sock *fastopen; 372 struct ipv6_pinfo *np; 373 struct tcp_sock *tp; 374 __u32 seq, snd_una; 375 struct sock *sk; 376 bool fatal; 377 int err; 378 379 sk = __inet6_lookup_established(net, &tcp_hashinfo, 380 &hdr->daddr, th->dest, 381 &hdr->saddr, ntohs(th->source), 382 skb->dev->ifindex, inet6_sdif(skb)); 383 384 if (!sk) { 385 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 386 ICMP6_MIB_INERRORS); 387 return -ENOENT; 388 } 389 390 if (sk->sk_state == TCP_TIME_WAIT) { 391 inet_twsk_put(inet_twsk(sk)); 392 return 0; 393 } 394 seq = ntohl(th->seq); 395 fatal = icmpv6_err_convert(type, code, &err); 396 if (sk->sk_state == TCP_NEW_SYN_RECV) { 397 tcp_req_err(sk, seq, fatal); 398 return 0; 399 } 400 401 bh_lock_sock(sk); 402 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 403 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 404 405 if (sk->sk_state == TCP_CLOSE) 406 goto out; 407 408 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { 409 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 410 goto out; 411 } 412 413 tp = tcp_sk(sk); 414 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 415 fastopen = rcu_dereference(tp->fastopen_rsk); 416 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 417 if (sk->sk_state != TCP_LISTEN && 418 !between(seq, snd_una, tp->snd_nxt)) { 419 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 420 goto out; 421 } 422 423 np = tcp_inet6_sk(sk); 424 425 if (type == NDISC_REDIRECT) { 426 if (!sock_owned_by_user(sk)) { 427 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 428 429 if (dst) 430 dst->ops->redirect(dst, sk, skb); 431 } 432 goto out; 433 } 434 435 if (type == ICMPV6_PKT_TOOBIG) { 436 /* We are not interested in TCP_LISTEN and open_requests 437 * (SYN-ACKs send out by Linux are always <576bytes so 438 * they should go through unfragmented). 439 */ 440 if (sk->sk_state == TCP_LISTEN) 441 goto out; 442 443 if (!ip6_sk_accept_pmtu(sk)) 444 goto out; 445 446 tp->mtu_info = ntohl(info); 447 if (!sock_owned_by_user(sk)) 448 tcp_v6_mtu_reduced(sk); 449 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 450 &sk->sk_tsq_flags)) 451 sock_hold(sk); 452 goto out; 453 } 454 455 456 /* Might be for an request_sock */ 457 switch (sk->sk_state) { 458 case TCP_SYN_SENT: 459 case TCP_SYN_RECV: 460 /* Only in fast or simultaneous open. If a fast open socket is 461 * already accepted it is treated as a connected one below. 462 */ 463 if (fastopen && !fastopen->sk) 464 break; 465 466 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 467 468 if (!sock_owned_by_user(sk)) { 469 sk->sk_err = err; 470 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 471 472 tcp_done(sk); 473 } else 474 sk->sk_err_soft = err; 475 goto out; 476 case TCP_LISTEN: 477 break; 478 default: 479 /* check if this ICMP message allows revert of backoff. 480 * (see RFC 6069) 481 */ 482 if (!fastopen && type == ICMPV6_DEST_UNREACH && 483 code == ICMPV6_NOROUTE) 484 tcp_ld_RTO_revert(sk, seq); 485 } 486 487 if (!sock_owned_by_user(sk) && np->recverr) { 488 sk->sk_err = err; 489 sk->sk_error_report(sk); 490 } else 491 sk->sk_err_soft = err; 492 493 out: 494 bh_unlock_sock(sk); 495 sock_put(sk); 496 return 0; 497 } 498 499 500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 501 struct flowi *fl, 502 struct request_sock *req, 503 struct tcp_fastopen_cookie *foc, 504 enum tcp_synack_type synack_type, 505 struct sk_buff *syn_skb) 506 { 507 struct inet_request_sock *ireq = inet_rsk(req); 508 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 509 struct ipv6_txoptions *opt; 510 struct flowi6 *fl6 = &fl->u.ip6; 511 struct sk_buff *skb; 512 int err = -ENOMEM; 513 u8 tclass; 514 515 /* First, grab a route. */ 516 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 517 IPPROTO_TCP)) == NULL) 518 goto done; 519 520 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 521 522 if (skb) { 523 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 524 &ireq->ir_v6_rmt_addr); 525 526 fl6->daddr = ireq->ir_v6_rmt_addr; 527 if (np->repflow && ireq->pktopts) 528 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 529 530 rcu_read_lock(); 531 opt = ireq->ipv6_opt; 532 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? 533 tcp_rsk(req)->syn_tos : np->tclass; 534 if (!opt) 535 opt = rcu_dereference(np->opt); 536 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, 537 tclass & ~INET_ECN_MASK, 538 sk->sk_priority); 539 rcu_read_unlock(); 540 err = net_xmit_eval(err); 541 } 542 543 done: 544 return err; 545 } 546 547 548 static void tcp_v6_reqsk_destructor(struct request_sock *req) 549 { 550 kfree(inet_rsk(req)->ipv6_opt); 551 kfree_skb(inet_rsk(req)->pktopts); 552 } 553 554 #ifdef CONFIG_TCP_MD5SIG 555 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 556 const struct in6_addr *addr, 557 int l3index) 558 { 559 return tcp_md5_do_lookup(sk, l3index, 560 (union tcp_md5_addr *)addr, AF_INET6); 561 } 562 563 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 564 const struct sock *addr_sk) 565 { 566 int l3index; 567 568 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 569 addr_sk->sk_bound_dev_if); 570 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 571 l3index); 572 } 573 574 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 575 sockptr_t optval, int optlen) 576 { 577 struct tcp_md5sig cmd; 578 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 579 int l3index = 0; 580 u8 prefixlen; 581 582 if (optlen < sizeof(cmd)) 583 return -EINVAL; 584 585 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 586 return -EFAULT; 587 588 if (sin6->sin6_family != AF_INET6) 589 return -EINVAL; 590 591 if (optname == TCP_MD5SIG_EXT && 592 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 593 prefixlen = cmd.tcpm_prefixlen; 594 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 595 prefixlen > 32)) 596 return -EINVAL; 597 } else { 598 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 599 } 600 601 if (optname == TCP_MD5SIG_EXT && 602 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 603 struct net_device *dev; 604 605 rcu_read_lock(); 606 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 607 if (dev && netif_is_l3_master(dev)) 608 l3index = dev->ifindex; 609 rcu_read_unlock(); 610 611 /* ok to reference set/not set outside of rcu; 612 * right now device MUST be an L3 master 613 */ 614 if (!dev || !l3index) 615 return -EINVAL; 616 } 617 618 if (!cmd.tcpm_keylen) { 619 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 620 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 621 AF_INET, prefixlen, 622 l3index); 623 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 624 AF_INET6, prefixlen, l3index); 625 } 626 627 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 628 return -EINVAL; 629 630 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 631 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 632 AF_INET, prefixlen, l3index, 633 cmd.tcpm_key, cmd.tcpm_keylen, 634 GFP_KERNEL); 635 636 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 637 AF_INET6, prefixlen, l3index, 638 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 639 } 640 641 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 642 const struct in6_addr *daddr, 643 const struct in6_addr *saddr, 644 const struct tcphdr *th, int nbytes) 645 { 646 struct tcp6_pseudohdr *bp; 647 struct scatterlist sg; 648 struct tcphdr *_th; 649 650 bp = hp->scratch; 651 /* 1. TCP pseudo-header (RFC2460) */ 652 bp->saddr = *saddr; 653 bp->daddr = *daddr; 654 bp->protocol = cpu_to_be32(IPPROTO_TCP); 655 bp->len = cpu_to_be32(nbytes); 656 657 _th = (struct tcphdr *)(bp + 1); 658 memcpy(_th, th, sizeof(*th)); 659 _th->check = 0; 660 661 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 662 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 663 sizeof(*bp) + sizeof(*th)); 664 return crypto_ahash_update(hp->md5_req); 665 } 666 667 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 668 const struct in6_addr *daddr, struct in6_addr *saddr, 669 const struct tcphdr *th) 670 { 671 struct tcp_md5sig_pool *hp; 672 struct ahash_request *req; 673 674 hp = tcp_get_md5sig_pool(); 675 if (!hp) 676 goto clear_hash_noput; 677 req = hp->md5_req; 678 679 if (crypto_ahash_init(req)) 680 goto clear_hash; 681 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 682 goto clear_hash; 683 if (tcp_md5_hash_key(hp, key)) 684 goto clear_hash; 685 ahash_request_set_crypt(req, NULL, md5_hash, 0); 686 if (crypto_ahash_final(req)) 687 goto clear_hash; 688 689 tcp_put_md5sig_pool(); 690 return 0; 691 692 clear_hash: 693 tcp_put_md5sig_pool(); 694 clear_hash_noput: 695 memset(md5_hash, 0, 16); 696 return 1; 697 } 698 699 static int tcp_v6_md5_hash_skb(char *md5_hash, 700 const struct tcp_md5sig_key *key, 701 const struct sock *sk, 702 const struct sk_buff *skb) 703 { 704 const struct in6_addr *saddr, *daddr; 705 struct tcp_md5sig_pool *hp; 706 struct ahash_request *req; 707 const struct tcphdr *th = tcp_hdr(skb); 708 709 if (sk) { /* valid for establish/request sockets */ 710 saddr = &sk->sk_v6_rcv_saddr; 711 daddr = &sk->sk_v6_daddr; 712 } else { 713 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 714 saddr = &ip6h->saddr; 715 daddr = &ip6h->daddr; 716 } 717 718 hp = tcp_get_md5sig_pool(); 719 if (!hp) 720 goto clear_hash_noput; 721 req = hp->md5_req; 722 723 if (crypto_ahash_init(req)) 724 goto clear_hash; 725 726 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 727 goto clear_hash; 728 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 729 goto clear_hash; 730 if (tcp_md5_hash_key(hp, key)) 731 goto clear_hash; 732 ahash_request_set_crypt(req, NULL, md5_hash, 0); 733 if (crypto_ahash_final(req)) 734 goto clear_hash; 735 736 tcp_put_md5sig_pool(); 737 return 0; 738 739 clear_hash: 740 tcp_put_md5sig_pool(); 741 clear_hash_noput: 742 memset(md5_hash, 0, 16); 743 return 1; 744 } 745 746 #endif 747 748 static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 749 const struct sk_buff *skb, 750 int dif, int sdif) 751 { 752 #ifdef CONFIG_TCP_MD5SIG 753 const __u8 *hash_location = NULL; 754 struct tcp_md5sig_key *hash_expected; 755 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 756 const struct tcphdr *th = tcp_hdr(skb); 757 int genhash, l3index; 758 u8 newhash[16]; 759 760 /* sdif set, means packet ingressed via a device 761 * in an L3 domain and dif is set to the l3mdev 762 */ 763 l3index = sdif ? dif : 0; 764 765 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); 766 hash_location = tcp_parse_md5sig_option(th); 767 768 /* We've parsed the options - do we have a hash? */ 769 if (!hash_expected && !hash_location) 770 return false; 771 772 if (hash_expected && !hash_location) { 773 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 774 return true; 775 } 776 777 if (!hash_expected && hash_location) { 778 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 779 return true; 780 } 781 782 /* check the signature */ 783 genhash = tcp_v6_md5_hash_skb(newhash, 784 hash_expected, 785 NULL, skb); 786 787 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 788 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 789 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", 790 genhash ? "failed" : "mismatch", 791 &ip6h->saddr, ntohs(th->source), 792 &ip6h->daddr, ntohs(th->dest), l3index); 793 return true; 794 } 795 #endif 796 return false; 797 } 798 799 static void tcp_v6_init_req(struct request_sock *req, 800 const struct sock *sk_listener, 801 struct sk_buff *skb) 802 { 803 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 804 struct inet_request_sock *ireq = inet_rsk(req); 805 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 806 807 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 808 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 809 810 /* So that link locals have meaning */ 811 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 812 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 813 ireq->ir_iif = tcp_v6_iif(skb); 814 815 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 816 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 817 np->rxopt.bits.rxinfo || 818 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 819 np->rxopt.bits.rxohlim || np->repflow)) { 820 refcount_inc(&skb->users); 821 ireq->pktopts = skb; 822 } 823 } 824 825 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 826 struct flowi *fl, 827 const struct request_sock *req) 828 { 829 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 830 } 831 832 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 833 .family = AF_INET6, 834 .obj_size = sizeof(struct tcp6_request_sock), 835 .rtx_syn_ack = tcp_rtx_synack, 836 .send_ack = tcp_v6_reqsk_send_ack, 837 .destructor = tcp_v6_reqsk_destructor, 838 .send_reset = tcp_v6_send_reset, 839 .syn_ack_timeout = tcp_syn_ack_timeout, 840 }; 841 842 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 843 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 844 sizeof(struct ipv6hdr), 845 #ifdef CONFIG_TCP_MD5SIG 846 .req_md5_lookup = tcp_v6_md5_lookup, 847 .calc_md5_hash = tcp_v6_md5_hash_skb, 848 #endif 849 .init_req = tcp_v6_init_req, 850 #ifdef CONFIG_SYN_COOKIES 851 .cookie_init_seq = cookie_v6_init_sequence, 852 #endif 853 .route_req = tcp_v6_route_req, 854 .init_seq = tcp_v6_init_seq, 855 .init_ts_off = tcp_v6_init_ts_off, 856 .send_synack = tcp_v6_send_synack, 857 }; 858 859 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 860 u32 ack, u32 win, u32 tsval, u32 tsecr, 861 int oif, struct tcp_md5sig_key *key, int rst, 862 u8 tclass, __be32 label, u32 priority) 863 { 864 const struct tcphdr *th = tcp_hdr(skb); 865 struct tcphdr *t1; 866 struct sk_buff *buff; 867 struct flowi6 fl6; 868 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 869 struct sock *ctl_sk = net->ipv6.tcp_sk; 870 unsigned int tot_len = sizeof(struct tcphdr); 871 struct dst_entry *dst; 872 __be32 *topt; 873 __u32 mark = 0; 874 875 if (tsecr) 876 tot_len += TCPOLEN_TSTAMP_ALIGNED; 877 #ifdef CONFIG_TCP_MD5SIG 878 if (key) 879 tot_len += TCPOLEN_MD5SIG_ALIGNED; 880 #endif 881 882 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, 883 GFP_ATOMIC); 884 if (!buff) 885 return; 886 887 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); 888 889 t1 = skb_push(buff, tot_len); 890 skb_reset_transport_header(buff); 891 892 /* Swap the send and the receive. */ 893 memset(t1, 0, sizeof(*t1)); 894 t1->dest = th->source; 895 t1->source = th->dest; 896 t1->doff = tot_len / 4; 897 t1->seq = htonl(seq); 898 t1->ack_seq = htonl(ack); 899 t1->ack = !rst || !th->ack; 900 t1->rst = rst; 901 t1->window = htons(win); 902 903 topt = (__be32 *)(t1 + 1); 904 905 if (tsecr) { 906 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 907 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 908 *topt++ = htonl(tsval); 909 *topt++ = htonl(tsecr); 910 } 911 912 #ifdef CONFIG_TCP_MD5SIG 913 if (key) { 914 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 915 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 916 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 917 &ipv6_hdr(skb)->saddr, 918 &ipv6_hdr(skb)->daddr, t1); 919 } 920 #endif 921 922 memset(&fl6, 0, sizeof(fl6)); 923 fl6.daddr = ipv6_hdr(skb)->saddr; 924 fl6.saddr = ipv6_hdr(skb)->daddr; 925 fl6.flowlabel = label; 926 927 buff->ip_summed = CHECKSUM_PARTIAL; 928 buff->csum = 0; 929 930 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 931 932 fl6.flowi6_proto = IPPROTO_TCP; 933 if (rt6_need_strict(&fl6.daddr) && !oif) 934 fl6.flowi6_oif = tcp_v6_iif(skb); 935 else { 936 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 937 oif = skb->skb_iif; 938 939 fl6.flowi6_oif = oif; 940 } 941 942 if (sk) { 943 if (sk->sk_state == TCP_TIME_WAIT) { 944 mark = inet_twsk(sk)->tw_mark; 945 /* autoflowlabel relies on buff->hash */ 946 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 947 PKT_HASH_TYPE_L4); 948 } else { 949 mark = sk->sk_mark; 950 } 951 buff->tstamp = tcp_transmit_time(sk); 952 } 953 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 954 fl6.fl6_dport = t1->dest; 955 fl6.fl6_sport = t1->source; 956 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 957 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 958 959 /* Pass a socket to ip6_dst_lookup either it is for RST 960 * Underlying function will use this to retrieve the network 961 * namespace 962 */ 963 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); 964 if (!IS_ERR(dst)) { 965 skb_dst_set(buff, dst); 966 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 967 tclass & ~INET_ECN_MASK, priority); 968 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 969 if (rst) 970 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 971 return; 972 } 973 974 kfree_skb(buff); 975 } 976 977 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 978 { 979 const struct tcphdr *th = tcp_hdr(skb); 980 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 981 u32 seq = 0, ack_seq = 0; 982 struct tcp_md5sig_key *key = NULL; 983 #ifdef CONFIG_TCP_MD5SIG 984 const __u8 *hash_location = NULL; 985 unsigned char newhash[16]; 986 int genhash; 987 struct sock *sk1 = NULL; 988 #endif 989 __be32 label = 0; 990 u32 priority = 0; 991 struct net *net; 992 int oif = 0; 993 994 if (th->rst) 995 return; 996 997 /* If sk not NULL, it means we did a successful lookup and incoming 998 * route had to be correct. prequeue might have dropped our dst. 999 */ 1000 if (!sk && !ipv6_unicast_destination(skb)) 1001 return; 1002 1003 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1004 #ifdef CONFIG_TCP_MD5SIG 1005 rcu_read_lock(); 1006 hash_location = tcp_parse_md5sig_option(th); 1007 if (sk && sk_fullsock(sk)) { 1008 int l3index; 1009 1010 /* sdif set, means packet ingressed via a device 1011 * in an L3 domain and inet_iif is set to it. 1012 */ 1013 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1014 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1015 } else if (hash_location) { 1016 int dif = tcp_v6_iif_l3_slave(skb); 1017 int sdif = tcp_v6_sdif(skb); 1018 int l3index; 1019 1020 /* 1021 * active side is lost. Try to find listening socket through 1022 * source port, and then find md5 key through listening socket. 1023 * we are not loose security here: 1024 * Incoming packet is checked with md5 hash with finding key, 1025 * no RST generated if md5 hash doesn't match. 1026 */ 1027 sk1 = inet6_lookup_listener(net, 1028 &tcp_hashinfo, NULL, 0, 1029 &ipv6h->saddr, 1030 th->source, &ipv6h->daddr, 1031 ntohs(th->source), dif, sdif); 1032 if (!sk1) 1033 goto out; 1034 1035 /* sdif set, means packet ingressed via a device 1036 * in an L3 domain and dif is set to it. 1037 */ 1038 l3index = tcp_v6_sdif(skb) ? dif : 0; 1039 1040 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1041 if (!key) 1042 goto out; 1043 1044 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1045 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1046 goto out; 1047 } 1048 #endif 1049 1050 if (th->ack) 1051 seq = ntohl(th->ack_seq); 1052 else 1053 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1054 (th->doff << 2); 1055 1056 if (sk) { 1057 oif = sk->sk_bound_dev_if; 1058 if (sk_fullsock(sk)) { 1059 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1060 1061 trace_tcp_send_reset(sk, skb); 1062 if (np->repflow) 1063 label = ip6_flowlabel(ipv6h); 1064 priority = sk->sk_priority; 1065 } 1066 if (sk->sk_state == TCP_TIME_WAIT) { 1067 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1068 priority = inet_twsk(sk)->tw_priority; 1069 } 1070 } else { 1071 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1072 label = ip6_flowlabel(ipv6h); 1073 } 1074 1075 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1076 ipv6_get_dsfield(ipv6h), label, priority); 1077 1078 #ifdef CONFIG_TCP_MD5SIG 1079 out: 1080 rcu_read_unlock(); 1081 #endif 1082 } 1083 1084 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1085 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1086 struct tcp_md5sig_key *key, u8 tclass, 1087 __be32 label, u32 priority) 1088 { 1089 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1090 tclass, label, priority); 1091 } 1092 1093 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1094 { 1095 struct inet_timewait_sock *tw = inet_twsk(sk); 1096 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1097 1098 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1099 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1100 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1101 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1102 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1103 1104 inet_twsk_put(tw); 1105 } 1106 1107 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1108 struct request_sock *req) 1109 { 1110 int l3index; 1111 1112 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1113 1114 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1115 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1116 */ 1117 /* RFC 7323 2.3 1118 * The window field (SEG.WND) of every outgoing segment, with the 1119 * exception of <SYN> segments, MUST be right-shifted by 1120 * Rcv.Wind.Shift bits: 1121 */ 1122 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1123 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1124 tcp_rsk(req)->rcv_nxt, 1125 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1126 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1127 req->ts_recent, sk->sk_bound_dev_if, 1128 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1129 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1130 } 1131 1132 1133 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1134 { 1135 #ifdef CONFIG_SYN_COOKIES 1136 const struct tcphdr *th = tcp_hdr(skb); 1137 1138 if (!th->syn) 1139 sk = cookie_v6_check(sk, skb); 1140 #endif 1141 return sk; 1142 } 1143 1144 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1145 struct tcphdr *th, u32 *cookie) 1146 { 1147 u16 mss = 0; 1148 #ifdef CONFIG_SYN_COOKIES 1149 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1150 &tcp_request_sock_ipv6_ops, sk, th); 1151 if (mss) { 1152 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1153 tcp_synq_overflow(sk); 1154 } 1155 #endif 1156 return mss; 1157 } 1158 1159 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1160 { 1161 if (skb->protocol == htons(ETH_P_IP)) 1162 return tcp_v4_conn_request(sk, skb); 1163 1164 if (!ipv6_unicast_destination(skb)) 1165 goto drop; 1166 1167 return tcp_conn_request(&tcp6_request_sock_ops, 1168 &tcp_request_sock_ipv6_ops, sk, skb); 1169 1170 drop: 1171 tcp_listendrop(sk); 1172 return 0; /* don't send reset */ 1173 } 1174 1175 static void tcp_v6_restore_cb(struct sk_buff *skb) 1176 { 1177 /* We need to move header back to the beginning if xfrm6_policy_check() 1178 * and tcp_v6_fill_cb() are going to be called again. 1179 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1180 */ 1181 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1182 sizeof(struct inet6_skb_parm)); 1183 } 1184 1185 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1186 struct request_sock *req, 1187 struct dst_entry *dst, 1188 struct request_sock *req_unhash, 1189 bool *own_req) 1190 { 1191 struct inet_request_sock *ireq; 1192 struct ipv6_pinfo *newnp; 1193 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1194 struct ipv6_txoptions *opt; 1195 struct inet_sock *newinet; 1196 struct tcp_sock *newtp; 1197 struct sock *newsk; 1198 #ifdef CONFIG_TCP_MD5SIG 1199 struct tcp_md5sig_key *key; 1200 int l3index; 1201 #endif 1202 struct flowi6 fl6; 1203 1204 if (skb->protocol == htons(ETH_P_IP)) { 1205 /* 1206 * v6 mapped 1207 */ 1208 1209 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1210 req_unhash, own_req); 1211 1212 if (!newsk) 1213 return NULL; 1214 1215 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1216 1217 newinet = inet_sk(newsk); 1218 newnp = tcp_inet6_sk(newsk); 1219 newtp = tcp_sk(newsk); 1220 1221 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1222 1223 newnp->saddr = newsk->sk_v6_rcv_saddr; 1224 1225 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1226 if (sk_is_mptcp(newsk)) 1227 mptcpv6_handle_mapped(newsk, true); 1228 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1229 #ifdef CONFIG_TCP_MD5SIG 1230 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1231 #endif 1232 1233 newnp->ipv6_mc_list = NULL; 1234 newnp->ipv6_ac_list = NULL; 1235 newnp->ipv6_fl_list = NULL; 1236 newnp->pktoptions = NULL; 1237 newnp->opt = NULL; 1238 newnp->mcast_oif = inet_iif(skb); 1239 newnp->mcast_hops = ip_hdr(skb)->ttl; 1240 newnp->rcv_flowinfo = 0; 1241 if (np->repflow) 1242 newnp->flow_label = 0; 1243 1244 /* 1245 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1246 * here, tcp_create_openreq_child now does this for us, see the comment in 1247 * that function for the gory details. -acme 1248 */ 1249 1250 /* It is tricky place. Until this moment IPv4 tcp 1251 worked with IPv6 icsk.icsk_af_ops. 1252 Sync it now. 1253 */ 1254 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1255 1256 return newsk; 1257 } 1258 1259 ireq = inet_rsk(req); 1260 1261 if (sk_acceptq_is_full(sk)) 1262 goto out_overflow; 1263 1264 if (!dst) { 1265 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1266 if (!dst) 1267 goto out; 1268 } 1269 1270 newsk = tcp_create_openreq_child(sk, req, skb); 1271 if (!newsk) 1272 goto out_nonewsk; 1273 1274 /* 1275 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1276 * count here, tcp_create_openreq_child now does this for us, see the 1277 * comment in that function for the gory details. -acme 1278 */ 1279 1280 newsk->sk_gso_type = SKB_GSO_TCPV6; 1281 ip6_dst_store(newsk, dst, NULL, NULL); 1282 inet6_sk_rx_dst_set(newsk, skb); 1283 1284 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1285 1286 newtp = tcp_sk(newsk); 1287 newinet = inet_sk(newsk); 1288 newnp = tcp_inet6_sk(newsk); 1289 1290 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1291 1292 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1293 newnp->saddr = ireq->ir_v6_loc_addr; 1294 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1295 newsk->sk_bound_dev_if = ireq->ir_iif; 1296 1297 /* Now IPv6 options... 1298 1299 First: no IPv4 options. 1300 */ 1301 newinet->inet_opt = NULL; 1302 newnp->ipv6_mc_list = NULL; 1303 newnp->ipv6_ac_list = NULL; 1304 newnp->ipv6_fl_list = NULL; 1305 1306 /* Clone RX bits */ 1307 newnp->rxopt.all = np->rxopt.all; 1308 1309 newnp->pktoptions = NULL; 1310 newnp->opt = NULL; 1311 newnp->mcast_oif = tcp_v6_iif(skb); 1312 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1313 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1314 if (np->repflow) 1315 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1316 1317 /* Set ToS of the new socket based upon the value of incoming SYN. */ 1318 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) 1319 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1320 1321 /* Clone native IPv6 options from listening socket (if any) 1322 1323 Yes, keeping reference count would be much more clever, 1324 but we make one more one thing there: reattach optmem 1325 to newsk. 1326 */ 1327 opt = ireq->ipv6_opt; 1328 if (!opt) 1329 opt = rcu_dereference(np->opt); 1330 if (opt) { 1331 opt = ipv6_dup_options(newsk, opt); 1332 RCU_INIT_POINTER(newnp->opt, opt); 1333 } 1334 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1335 if (opt) 1336 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1337 opt->opt_flen; 1338 1339 tcp_ca_openreq_child(newsk, dst); 1340 1341 tcp_sync_mss(newsk, dst_mtu(dst)); 1342 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1343 1344 tcp_initialize_rcv_mss(newsk); 1345 1346 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1347 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1348 1349 #ifdef CONFIG_TCP_MD5SIG 1350 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1351 1352 /* Copy over the MD5 key from the original socket */ 1353 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1354 if (key) { 1355 /* We're using one, so create a matching key 1356 * on the newsk structure. If we fail to get 1357 * memory, then we end up not copying the key 1358 * across. Shucks. 1359 */ 1360 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1361 AF_INET6, 128, l3index, key->key, key->keylen, 1362 sk_gfp_mask(sk, GFP_ATOMIC)); 1363 } 1364 #endif 1365 1366 if (__inet_inherit_port(sk, newsk) < 0) { 1367 inet_csk_prepare_forced_close(newsk); 1368 tcp_done(newsk); 1369 goto out; 1370 } 1371 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 1372 if (*own_req) { 1373 tcp_move_syn(newtp, req); 1374 1375 /* Clone pktoptions received with SYN, if we own the req */ 1376 if (ireq->pktopts) { 1377 newnp->pktoptions = skb_clone(ireq->pktopts, 1378 sk_gfp_mask(sk, GFP_ATOMIC)); 1379 consume_skb(ireq->pktopts); 1380 ireq->pktopts = NULL; 1381 if (newnp->pktoptions) { 1382 tcp_v6_restore_cb(newnp->pktoptions); 1383 skb_set_owner_r(newnp->pktoptions, newsk); 1384 } 1385 } 1386 } 1387 1388 return newsk; 1389 1390 out_overflow: 1391 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1392 out_nonewsk: 1393 dst_release(dst); 1394 out: 1395 tcp_listendrop(sk); 1396 return NULL; 1397 } 1398 1399 /* The socket must have it's spinlock held when we get 1400 * here, unless it is a TCP_LISTEN socket. 1401 * 1402 * We have a potential double-lock case here, so even when 1403 * doing backlog processing we use the BH locking scheme. 1404 * This is because we cannot sleep with the original spinlock 1405 * held. 1406 */ 1407 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1408 { 1409 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1410 struct sk_buff *opt_skb = NULL; 1411 struct tcp_sock *tp; 1412 1413 /* Imagine: socket is IPv6. IPv4 packet arrives, 1414 goes to IPv4 receive handler and backlogged. 1415 From backlog it always goes here. Kerboom... 1416 Fortunately, tcp_rcv_established and rcv_established 1417 handle them correctly, but it is not case with 1418 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1419 */ 1420 1421 if (skb->protocol == htons(ETH_P_IP)) 1422 return tcp_v4_do_rcv(sk, skb); 1423 1424 /* 1425 * socket locking is here for SMP purposes as backlog rcv 1426 * is currently called with bh processing disabled. 1427 */ 1428 1429 /* Do Stevens' IPV6_PKTOPTIONS. 1430 1431 Yes, guys, it is the only place in our code, where we 1432 may make it not affecting IPv4. 1433 The rest of code is protocol independent, 1434 and I do not like idea to uglify IPv4. 1435 1436 Actually, all the idea behind IPV6_PKTOPTIONS 1437 looks not very well thought. For now we latch 1438 options, received in the last packet, enqueued 1439 by tcp. Feel free to propose better solution. 1440 --ANK (980728) 1441 */ 1442 if (np->rxopt.all) 1443 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1444 1445 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1446 struct dst_entry *dst = sk->sk_rx_dst; 1447 1448 sock_rps_save_rxhash(sk, skb); 1449 sk_mark_napi_id(sk, skb); 1450 if (dst) { 1451 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 1452 dst->ops->check(dst, np->rx_dst_cookie) == NULL) { 1453 dst_release(dst); 1454 sk->sk_rx_dst = NULL; 1455 } 1456 } 1457 1458 tcp_rcv_established(sk, skb); 1459 if (opt_skb) 1460 goto ipv6_pktoptions; 1461 return 0; 1462 } 1463 1464 if (tcp_checksum_complete(skb)) 1465 goto csum_err; 1466 1467 if (sk->sk_state == TCP_LISTEN) { 1468 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1469 1470 if (!nsk) 1471 goto discard; 1472 1473 if (nsk != sk) { 1474 if (tcp_child_process(sk, nsk, skb)) 1475 goto reset; 1476 if (opt_skb) 1477 __kfree_skb(opt_skb); 1478 return 0; 1479 } 1480 } else 1481 sock_rps_save_rxhash(sk, skb); 1482 1483 if (tcp_rcv_state_process(sk, skb)) 1484 goto reset; 1485 if (opt_skb) 1486 goto ipv6_pktoptions; 1487 return 0; 1488 1489 reset: 1490 tcp_v6_send_reset(sk, skb); 1491 discard: 1492 if (opt_skb) 1493 __kfree_skb(opt_skb); 1494 kfree_skb(skb); 1495 return 0; 1496 csum_err: 1497 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1498 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1499 goto discard; 1500 1501 1502 ipv6_pktoptions: 1503 /* Do you ask, what is it? 1504 1505 1. skb was enqueued by tcp. 1506 2. skb is added to tail of read queue, rather than out of order. 1507 3. socket is not in passive state. 1508 4. Finally, it really contains options, which user wants to receive. 1509 */ 1510 tp = tcp_sk(sk); 1511 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1512 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1513 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1514 np->mcast_oif = tcp_v6_iif(opt_skb); 1515 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1516 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1517 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1518 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1519 if (np->repflow) 1520 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1521 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1522 skb_set_owner_r(opt_skb, sk); 1523 tcp_v6_restore_cb(opt_skb); 1524 opt_skb = xchg(&np->pktoptions, opt_skb); 1525 } else { 1526 __kfree_skb(opt_skb); 1527 opt_skb = xchg(&np->pktoptions, NULL); 1528 } 1529 } 1530 1531 kfree_skb(opt_skb); 1532 return 0; 1533 } 1534 1535 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1536 const struct tcphdr *th) 1537 { 1538 /* This is tricky: we move IP6CB at its correct location into 1539 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1540 * _decode_session6() uses IP6CB(). 1541 * barrier() makes sure compiler won't play aliasing games. 1542 */ 1543 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1544 sizeof(struct inet6_skb_parm)); 1545 barrier(); 1546 1547 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1548 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1549 skb->len - th->doff*4); 1550 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1551 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1552 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1553 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1554 TCP_SKB_CB(skb)->sacked = 0; 1555 TCP_SKB_CB(skb)->has_rxtstamp = 1556 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1557 } 1558 1559 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1560 { 1561 struct sk_buff *skb_to_free; 1562 int sdif = inet6_sdif(skb); 1563 int dif = inet6_iif(skb); 1564 const struct tcphdr *th; 1565 const struct ipv6hdr *hdr; 1566 bool refcounted; 1567 struct sock *sk; 1568 int ret; 1569 struct net *net = dev_net(skb->dev); 1570 1571 if (skb->pkt_type != PACKET_HOST) 1572 goto discard_it; 1573 1574 /* 1575 * Count it even if it's bad. 1576 */ 1577 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1578 1579 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1580 goto discard_it; 1581 1582 th = (const struct tcphdr *)skb->data; 1583 1584 if (unlikely(th->doff < sizeof(struct tcphdr)/4)) 1585 goto bad_packet; 1586 if (!pskb_may_pull(skb, th->doff*4)) 1587 goto discard_it; 1588 1589 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1590 goto csum_error; 1591 1592 th = (const struct tcphdr *)skb->data; 1593 hdr = ipv6_hdr(skb); 1594 1595 lookup: 1596 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1597 th->source, th->dest, inet6_iif(skb), sdif, 1598 &refcounted); 1599 if (!sk) 1600 goto no_tcp_socket; 1601 1602 process: 1603 if (sk->sk_state == TCP_TIME_WAIT) 1604 goto do_time_wait; 1605 1606 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1607 struct request_sock *req = inet_reqsk(sk); 1608 bool req_stolen = false; 1609 struct sock *nsk; 1610 1611 sk = req->rsk_listener; 1612 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { 1613 sk_drops_add(sk, skb); 1614 reqsk_put(req); 1615 goto discard_it; 1616 } 1617 if (tcp_checksum_complete(skb)) { 1618 reqsk_put(req); 1619 goto csum_error; 1620 } 1621 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1622 inet_csk_reqsk_queue_drop_and_put(sk, req); 1623 goto lookup; 1624 } 1625 sock_hold(sk); 1626 refcounted = true; 1627 nsk = NULL; 1628 if (!tcp_filter(sk, skb)) { 1629 th = (const struct tcphdr *)skb->data; 1630 hdr = ipv6_hdr(skb); 1631 tcp_v6_fill_cb(skb, hdr, th); 1632 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1633 } 1634 if (!nsk) { 1635 reqsk_put(req); 1636 if (req_stolen) { 1637 /* Another cpu got exclusive access to req 1638 * and created a full blown socket. 1639 * Try to feed this packet to this socket 1640 * instead of discarding it. 1641 */ 1642 tcp_v6_restore_cb(skb); 1643 sock_put(sk); 1644 goto lookup; 1645 } 1646 goto discard_and_relse; 1647 } 1648 if (nsk == sk) { 1649 reqsk_put(req); 1650 tcp_v6_restore_cb(skb); 1651 } else if (tcp_child_process(sk, nsk, skb)) { 1652 tcp_v6_send_reset(nsk, skb); 1653 goto discard_and_relse; 1654 } else { 1655 sock_put(sk); 1656 return 0; 1657 } 1658 } 1659 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { 1660 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1661 goto discard_and_relse; 1662 } 1663 1664 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1665 goto discard_and_relse; 1666 1667 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) 1668 goto discard_and_relse; 1669 1670 if (tcp_filter(sk, skb)) 1671 goto discard_and_relse; 1672 th = (const struct tcphdr *)skb->data; 1673 hdr = ipv6_hdr(skb); 1674 tcp_v6_fill_cb(skb, hdr, th); 1675 1676 skb->dev = NULL; 1677 1678 if (sk->sk_state == TCP_LISTEN) { 1679 ret = tcp_v6_do_rcv(sk, skb); 1680 goto put_and_return; 1681 } 1682 1683 sk_incoming_cpu_update(sk); 1684 1685 bh_lock_sock_nested(sk); 1686 tcp_segs_in(tcp_sk(sk), skb); 1687 ret = 0; 1688 if (!sock_owned_by_user(sk)) { 1689 skb_to_free = sk->sk_rx_skb_cache; 1690 sk->sk_rx_skb_cache = NULL; 1691 ret = tcp_v6_do_rcv(sk, skb); 1692 } else { 1693 if (tcp_add_backlog(sk, skb)) 1694 goto discard_and_relse; 1695 skb_to_free = NULL; 1696 } 1697 bh_unlock_sock(sk); 1698 if (skb_to_free) 1699 __kfree_skb(skb_to_free); 1700 put_and_return: 1701 if (refcounted) 1702 sock_put(sk); 1703 return ret ? -1 : 0; 1704 1705 no_tcp_socket: 1706 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1707 goto discard_it; 1708 1709 tcp_v6_fill_cb(skb, hdr, th); 1710 1711 if (tcp_checksum_complete(skb)) { 1712 csum_error: 1713 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1714 bad_packet: 1715 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1716 } else { 1717 tcp_v6_send_reset(NULL, skb); 1718 } 1719 1720 discard_it: 1721 kfree_skb(skb); 1722 return 0; 1723 1724 discard_and_relse: 1725 sk_drops_add(sk, skb); 1726 if (refcounted) 1727 sock_put(sk); 1728 goto discard_it; 1729 1730 do_time_wait: 1731 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1732 inet_twsk_put(inet_twsk(sk)); 1733 goto discard_it; 1734 } 1735 1736 tcp_v6_fill_cb(skb, hdr, th); 1737 1738 if (tcp_checksum_complete(skb)) { 1739 inet_twsk_put(inet_twsk(sk)); 1740 goto csum_error; 1741 } 1742 1743 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1744 case TCP_TW_SYN: 1745 { 1746 struct sock *sk2; 1747 1748 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1749 skb, __tcp_hdrlen(th), 1750 &ipv6_hdr(skb)->saddr, th->source, 1751 &ipv6_hdr(skb)->daddr, 1752 ntohs(th->dest), 1753 tcp_v6_iif_l3_slave(skb), 1754 sdif); 1755 if (sk2) { 1756 struct inet_timewait_sock *tw = inet_twsk(sk); 1757 inet_twsk_deschedule_put(tw); 1758 sk = sk2; 1759 tcp_v6_restore_cb(skb); 1760 refcounted = false; 1761 goto process; 1762 } 1763 } 1764 /* to ACK */ 1765 fallthrough; 1766 case TCP_TW_ACK: 1767 tcp_v6_timewait_ack(sk, skb); 1768 break; 1769 case TCP_TW_RST: 1770 tcp_v6_send_reset(sk, skb); 1771 inet_twsk_deschedule_put(inet_twsk(sk)); 1772 goto discard_it; 1773 case TCP_TW_SUCCESS: 1774 ; 1775 } 1776 goto discard_it; 1777 } 1778 1779 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) 1780 { 1781 const struct ipv6hdr *hdr; 1782 const struct tcphdr *th; 1783 struct sock *sk; 1784 1785 if (skb->pkt_type != PACKET_HOST) 1786 return; 1787 1788 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1789 return; 1790 1791 hdr = ipv6_hdr(skb); 1792 th = tcp_hdr(skb); 1793 1794 if (th->doff < sizeof(struct tcphdr) / 4) 1795 return; 1796 1797 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1798 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1799 &hdr->saddr, th->source, 1800 &hdr->daddr, ntohs(th->dest), 1801 inet6_iif(skb), inet6_sdif(skb)); 1802 if (sk) { 1803 skb->sk = sk; 1804 skb->destructor = sock_edemux; 1805 if (sk_fullsock(sk)) { 1806 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); 1807 1808 if (dst) 1809 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); 1810 if (dst && 1811 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 1812 skb_dst_set_noref(skb, dst); 1813 } 1814 } 1815 } 1816 1817 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1818 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1819 .twsk_unique = tcp_twsk_unique, 1820 .twsk_destructor = tcp_twsk_destructor, 1821 }; 1822 1823 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1824 { 1825 struct ipv6_pinfo *np = inet6_sk(sk); 1826 1827 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); 1828 } 1829 1830 const struct inet_connection_sock_af_ops ipv6_specific = { 1831 .queue_xmit = inet6_csk_xmit, 1832 .send_check = tcp_v6_send_check, 1833 .rebuild_header = inet6_sk_rebuild_header, 1834 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1835 .conn_request = tcp_v6_conn_request, 1836 .syn_recv_sock = tcp_v6_syn_recv_sock, 1837 .net_header_len = sizeof(struct ipv6hdr), 1838 .net_frag_header_len = sizeof(struct frag_hdr), 1839 .setsockopt = ipv6_setsockopt, 1840 .getsockopt = ipv6_getsockopt, 1841 .addr2sockaddr = inet6_csk_addr2sockaddr, 1842 .sockaddr_len = sizeof(struct sockaddr_in6), 1843 .mtu_reduced = tcp_v6_mtu_reduced, 1844 }; 1845 1846 #ifdef CONFIG_TCP_MD5SIG 1847 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1848 .md5_lookup = tcp_v6_md5_lookup, 1849 .calc_md5_hash = tcp_v6_md5_hash_skb, 1850 .md5_parse = tcp_v6_parse_md5_keys, 1851 }; 1852 #endif 1853 1854 /* 1855 * TCP over IPv4 via INET6 API 1856 */ 1857 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1858 .queue_xmit = ip_queue_xmit, 1859 .send_check = tcp_v4_send_check, 1860 .rebuild_header = inet_sk_rebuild_header, 1861 .sk_rx_dst_set = inet_sk_rx_dst_set, 1862 .conn_request = tcp_v6_conn_request, 1863 .syn_recv_sock = tcp_v6_syn_recv_sock, 1864 .net_header_len = sizeof(struct iphdr), 1865 .setsockopt = ipv6_setsockopt, 1866 .getsockopt = ipv6_getsockopt, 1867 .addr2sockaddr = inet6_csk_addr2sockaddr, 1868 .sockaddr_len = sizeof(struct sockaddr_in6), 1869 .mtu_reduced = tcp_v4_mtu_reduced, 1870 }; 1871 1872 #ifdef CONFIG_TCP_MD5SIG 1873 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1874 .md5_lookup = tcp_v4_md5_lookup, 1875 .calc_md5_hash = tcp_v4_md5_hash_skb, 1876 .md5_parse = tcp_v6_parse_md5_keys, 1877 }; 1878 #endif 1879 1880 /* NOTE: A lot of things set to zero explicitly by call to 1881 * sk_alloc() so need not be done here. 1882 */ 1883 static int tcp_v6_init_sock(struct sock *sk) 1884 { 1885 struct inet_connection_sock *icsk = inet_csk(sk); 1886 1887 tcp_init_sock(sk); 1888 1889 icsk->icsk_af_ops = &ipv6_specific; 1890 1891 #ifdef CONFIG_TCP_MD5SIG 1892 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1893 #endif 1894 1895 return 0; 1896 } 1897 1898 static void tcp_v6_destroy_sock(struct sock *sk) 1899 { 1900 tcp_v4_destroy_sock(sk); 1901 inet6_destroy_sock(sk); 1902 } 1903 1904 #ifdef CONFIG_PROC_FS 1905 /* Proc filesystem TCPv6 sock list dumping. */ 1906 static void get_openreq6(struct seq_file *seq, 1907 const struct request_sock *req, int i) 1908 { 1909 long ttd = req->rsk_timer.expires - jiffies; 1910 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1911 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1912 1913 if (ttd < 0) 1914 ttd = 0; 1915 1916 seq_printf(seq, 1917 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1918 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1919 i, 1920 src->s6_addr32[0], src->s6_addr32[1], 1921 src->s6_addr32[2], src->s6_addr32[3], 1922 inet_rsk(req)->ir_num, 1923 dest->s6_addr32[0], dest->s6_addr32[1], 1924 dest->s6_addr32[2], dest->s6_addr32[3], 1925 ntohs(inet_rsk(req)->ir_rmt_port), 1926 TCP_SYN_RECV, 1927 0, 0, /* could print option size, but that is af dependent. */ 1928 1, /* timers active (only the expire timer) */ 1929 jiffies_to_clock_t(ttd), 1930 req->num_timeout, 1931 from_kuid_munged(seq_user_ns(seq), 1932 sock_i_uid(req->rsk_listener)), 1933 0, /* non standard timer */ 1934 0, /* open_requests have no inode */ 1935 0, req); 1936 } 1937 1938 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 1939 { 1940 const struct in6_addr *dest, *src; 1941 __u16 destp, srcp; 1942 int timer_active; 1943 unsigned long timer_expires; 1944 const struct inet_sock *inet = inet_sk(sp); 1945 const struct tcp_sock *tp = tcp_sk(sp); 1946 const struct inet_connection_sock *icsk = inet_csk(sp); 1947 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1948 int rx_queue; 1949 int state; 1950 1951 dest = &sp->sk_v6_daddr; 1952 src = &sp->sk_v6_rcv_saddr; 1953 destp = ntohs(inet->inet_dport); 1954 srcp = ntohs(inet->inet_sport); 1955 1956 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 1957 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 1958 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 1959 timer_active = 1; 1960 timer_expires = icsk->icsk_timeout; 1961 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 1962 timer_active = 4; 1963 timer_expires = icsk->icsk_timeout; 1964 } else if (timer_pending(&sp->sk_timer)) { 1965 timer_active = 2; 1966 timer_expires = sp->sk_timer.expires; 1967 } else { 1968 timer_active = 0; 1969 timer_expires = jiffies; 1970 } 1971 1972 state = inet_sk_state_load(sp); 1973 if (state == TCP_LISTEN) 1974 rx_queue = READ_ONCE(sp->sk_ack_backlog); 1975 else 1976 /* Because we don't lock the socket, 1977 * we might find a transient negative value. 1978 */ 1979 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 1980 READ_ONCE(tp->copied_seq), 0); 1981 1982 seq_printf(seq, 1983 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1984 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 1985 i, 1986 src->s6_addr32[0], src->s6_addr32[1], 1987 src->s6_addr32[2], src->s6_addr32[3], srcp, 1988 dest->s6_addr32[0], dest->s6_addr32[1], 1989 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1990 state, 1991 READ_ONCE(tp->write_seq) - tp->snd_una, 1992 rx_queue, 1993 timer_active, 1994 jiffies_delta_to_clock_t(timer_expires - jiffies), 1995 icsk->icsk_retransmits, 1996 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 1997 icsk->icsk_probes_out, 1998 sock_i_ino(sp), 1999 refcount_read(&sp->sk_refcnt), sp, 2000 jiffies_to_clock_t(icsk->icsk_rto), 2001 jiffies_to_clock_t(icsk->icsk_ack.ato), 2002 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2003 tp->snd_cwnd, 2004 state == TCP_LISTEN ? 2005 fastopenq->max_qlen : 2006 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2007 ); 2008 } 2009 2010 static void get_timewait6_sock(struct seq_file *seq, 2011 struct inet_timewait_sock *tw, int i) 2012 { 2013 long delta = tw->tw_timer.expires - jiffies; 2014 const struct in6_addr *dest, *src; 2015 __u16 destp, srcp; 2016 2017 dest = &tw->tw_v6_daddr; 2018 src = &tw->tw_v6_rcv_saddr; 2019 destp = ntohs(tw->tw_dport); 2020 srcp = ntohs(tw->tw_sport); 2021 2022 seq_printf(seq, 2023 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2024 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2025 i, 2026 src->s6_addr32[0], src->s6_addr32[1], 2027 src->s6_addr32[2], src->s6_addr32[3], srcp, 2028 dest->s6_addr32[0], dest->s6_addr32[1], 2029 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2030 tw->tw_substate, 0, 0, 2031 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2032 refcount_read(&tw->tw_refcnt), tw); 2033 } 2034 2035 static int tcp6_seq_show(struct seq_file *seq, void *v) 2036 { 2037 struct tcp_iter_state *st; 2038 struct sock *sk = v; 2039 2040 if (v == SEQ_START_TOKEN) { 2041 seq_puts(seq, 2042 " sl " 2043 "local_address " 2044 "remote_address " 2045 "st tx_queue rx_queue tr tm->when retrnsmt" 2046 " uid timeout inode\n"); 2047 goto out; 2048 } 2049 st = seq->private; 2050 2051 if (sk->sk_state == TCP_TIME_WAIT) 2052 get_timewait6_sock(seq, v, st->num); 2053 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2054 get_openreq6(seq, v, st->num); 2055 else 2056 get_tcp6_sock(seq, v, st->num); 2057 out: 2058 return 0; 2059 } 2060 2061 static const struct seq_operations tcp6_seq_ops = { 2062 .show = tcp6_seq_show, 2063 .start = tcp_seq_start, 2064 .next = tcp_seq_next, 2065 .stop = tcp_seq_stop, 2066 }; 2067 2068 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2069 .family = AF_INET6, 2070 }; 2071 2072 int __net_init tcp6_proc_init(struct net *net) 2073 { 2074 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2075 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2076 return -ENOMEM; 2077 return 0; 2078 } 2079 2080 void tcp6_proc_exit(struct net *net) 2081 { 2082 remove_proc_entry("tcp6", net->proc_net); 2083 } 2084 #endif 2085 2086 struct proto tcpv6_prot = { 2087 .name = "TCPv6", 2088 .owner = THIS_MODULE, 2089 .close = tcp_close, 2090 .pre_connect = tcp_v6_pre_connect, 2091 .connect = tcp_v6_connect, 2092 .disconnect = tcp_disconnect, 2093 .accept = inet_csk_accept, 2094 .ioctl = tcp_ioctl, 2095 .init = tcp_v6_init_sock, 2096 .destroy = tcp_v6_destroy_sock, 2097 .shutdown = tcp_shutdown, 2098 .setsockopt = tcp_setsockopt, 2099 .getsockopt = tcp_getsockopt, 2100 .keepalive = tcp_set_keepalive, 2101 .recvmsg = tcp_recvmsg, 2102 .sendmsg = tcp_sendmsg, 2103 .sendpage = tcp_sendpage, 2104 .backlog_rcv = tcp_v6_do_rcv, 2105 .release_cb = tcp_release_cb, 2106 .hash = inet6_hash, 2107 .unhash = inet_unhash, 2108 .get_port = inet_csk_get_port, 2109 .enter_memory_pressure = tcp_enter_memory_pressure, 2110 .leave_memory_pressure = tcp_leave_memory_pressure, 2111 .stream_memory_free = tcp_stream_memory_free, 2112 .sockets_allocated = &tcp_sockets_allocated, 2113 .memory_allocated = &tcp_memory_allocated, 2114 .memory_pressure = &tcp_memory_pressure, 2115 .orphan_count = &tcp_orphan_count, 2116 .sysctl_mem = sysctl_tcp_mem, 2117 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2118 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2119 .max_header = MAX_TCP_HEADER, 2120 .obj_size = sizeof(struct tcp6_sock), 2121 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2122 .twsk_prot = &tcp6_timewait_sock_ops, 2123 .rsk_prot = &tcp6_request_sock_ops, 2124 .h.hashinfo = &tcp_hashinfo, 2125 .no_autobind = true, 2126 .diag_destroy = tcp_abort, 2127 }; 2128 EXPORT_SYMBOL_GPL(tcpv6_prot); 2129 2130 /* thinking of making this const? Don't. 2131 * early_demux can change based on sysctl. 2132 */ 2133 static struct inet6_protocol tcpv6_protocol = { 2134 .early_demux = tcp_v6_early_demux, 2135 .early_demux_handler = tcp_v6_early_demux, 2136 .handler = tcp_v6_rcv, 2137 .err_handler = tcp_v6_err, 2138 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2139 }; 2140 2141 static struct inet_protosw tcpv6_protosw = { 2142 .type = SOCK_STREAM, 2143 .protocol = IPPROTO_TCP, 2144 .prot = &tcpv6_prot, 2145 .ops = &inet6_stream_ops, 2146 .flags = INET_PROTOSW_PERMANENT | 2147 INET_PROTOSW_ICSK, 2148 }; 2149 2150 static int __net_init tcpv6_net_init(struct net *net) 2151 { 2152 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2153 SOCK_RAW, IPPROTO_TCP, net); 2154 } 2155 2156 static void __net_exit tcpv6_net_exit(struct net *net) 2157 { 2158 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2159 } 2160 2161 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2162 { 2163 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2164 } 2165 2166 static struct pernet_operations tcpv6_net_ops = { 2167 .init = tcpv6_net_init, 2168 .exit = tcpv6_net_exit, 2169 .exit_batch = tcpv6_net_exit_batch, 2170 }; 2171 2172 int __init tcpv6_init(void) 2173 { 2174 int ret; 2175 2176 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2177 if (ret) 2178 goto out; 2179 2180 /* register inet6 protocol */ 2181 ret = inet6_register_protosw(&tcpv6_protosw); 2182 if (ret) 2183 goto out_tcpv6_protocol; 2184 2185 ret = register_pernet_subsys(&tcpv6_net_ops); 2186 if (ret) 2187 goto out_tcpv6_protosw; 2188 2189 ret = mptcpv6_init(); 2190 if (ret) 2191 goto out_tcpv6_pernet_subsys; 2192 2193 out: 2194 return ret; 2195 2196 out_tcpv6_pernet_subsys: 2197 unregister_pernet_subsys(&tcpv6_net_ops); 2198 out_tcpv6_protosw: 2199 inet6_unregister_protosw(&tcpv6_protosw); 2200 out_tcpv6_protocol: 2201 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2202 goto out; 2203 } 2204 2205 void tcpv6_exit(void) 2206 { 2207 unregister_pernet_subsys(&tcpv6_net_ops); 2208 inet6_unregister_protosw(&tcpv6_protosw); 2209 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2210 } 2211