1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Implementation of the Transmission Control Protocol(TCP). 7 * 8 * IPv4 specific functions 9 * 10 * 11 * code split from: 12 * linux/ipv4/tcp.c 13 * linux/ipv4/tcp_input.c 14 * linux/ipv4/tcp_output.c 15 * 16 * See tcp.c for author information 17 * 18 * This program is free software; you can redistribute it and/or 19 * modify it under the terms of the GNU General Public License 20 * as published by the Free Software Foundation; either version 21 * 2 of the License, or (at your option) any later version. 22 */ 23 24 /* 25 * Changes: 26 * David S. Miller : New socket lookup architecture. 27 * This code is dedicated to John Dyson. 28 * David S. Miller : Change semantics of established hash, 29 * half is devoted to TIME_WAIT sockets 30 * and the rest go in the other half. 31 * Andi Kleen : Add support for syncookies and fixed 32 * some bugs: ip options weren't passed to 33 * the TCP layer, missed a check for an 34 * ACK bit. 35 * Andi Kleen : Implemented fast path mtu discovery. 36 * Fixed many serious bugs in the 37 * request_sock handling and moved 38 * most of it into the af independent code. 39 * Added tail drop and some other bugfixes. 40 * Added new listen semantics. 41 * Mike McLagan : Routing by source 42 * Juan Jose Ciarlante: ip_dynaddr bits 43 * Andi Kleen: various fixes. 44 * Vitaly E. Lavrov : Transparent proxy revived after year 45 * coma. 46 * Andi Kleen : Fix new listen. 47 * Andi Kleen : Fix accept error reporting. 48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 50 * a single port at the same time. 51 */ 52 53 54 #include <linux/bottom_half.h> 55 #include <linux/types.h> 56 #include <linux/fcntl.h> 57 #include <linux/module.h> 58 #include <linux/random.h> 59 #include <linux/cache.h> 60 #include <linux/jhash.h> 61 #include <linux/init.h> 62 #include <linux/times.h> 63 #include <linux/slab.h> 64 65 #include <net/net_namespace.h> 66 #include <net/icmp.h> 67 #include <net/inet_hashtables.h> 68 #include <net/tcp.h> 69 #include <net/transp_v6.h> 70 #include <net/ipv6.h> 71 #include <net/inet_common.h> 72 #include <net/timewait_sock.h> 73 #include <net/xfrm.h> 74 #include <net/netdma.h> 75 76 #include <linux/inet.h> 77 #include <linux/ipv6.h> 78 #include <linux/stddef.h> 79 #include <linux/proc_fs.h> 80 #include <linux/seq_file.h> 81 82 #include <linux/crypto.h> 83 #include <linux/scatterlist.h> 84 85 int sysctl_tcp_tw_reuse __read_mostly; 86 int sysctl_tcp_low_latency __read_mostly; 87 EXPORT_SYMBOL(sysctl_tcp_low_latency); 88 89 90 #ifdef CONFIG_TCP_MD5SIG 91 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, 92 __be32 addr); 93 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 94 __be32 daddr, __be32 saddr, struct tcphdr *th); 95 #else 96 static inline 97 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) 98 { 99 return NULL; 100 } 101 #endif 102 103 struct inet_hashinfo tcp_hashinfo; 104 EXPORT_SYMBOL(tcp_hashinfo); 105 106 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 107 { 108 return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 109 ip_hdr(skb)->saddr, 110 tcp_hdr(skb)->dest, 111 tcp_hdr(skb)->source); 112 } 113 114 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 115 { 116 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 117 struct tcp_sock *tp = tcp_sk(sk); 118 119 /* With PAWS, it is safe from the viewpoint 120 of data integrity. Even without PAWS it is safe provided sequence 121 spaces do not overlap i.e. at data rates <= 80Mbit/sec. 122 123 Actually, the idea is close to VJ's one, only timestamp cache is 124 held not per host, but per port pair and TW bucket is used as state 125 holder. 126 127 If TW bucket has been already destroyed we fall back to VJ's scheme 128 and use initial timestamp retrieved from peer table. 129 */ 130 if (tcptw->tw_ts_recent_stamp && 131 (twp == NULL || (sysctl_tcp_tw_reuse && 132 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 133 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 134 if (tp->write_seq == 0) 135 tp->write_seq = 1; 136 tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 137 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 138 sock_hold(sktw); 139 return 1; 140 } 141 142 return 0; 143 } 144 EXPORT_SYMBOL_GPL(tcp_twsk_unique); 145 146 /* This will initiate an outgoing connection. */ 147 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 148 { 149 struct inet_sock *inet = inet_sk(sk); 150 struct tcp_sock *tp = tcp_sk(sk); 151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 152 __be16 orig_sport, orig_dport; 153 struct rtable *rt; 154 __be32 daddr, nexthop; 155 int err; 156 157 if (addr_len < sizeof(struct sockaddr_in)) 158 return -EINVAL; 159 160 if (usin->sin_family != AF_INET) 161 return -EAFNOSUPPORT; 162 163 nexthop = daddr = usin->sin_addr.s_addr; 164 if (inet->opt && inet->opt->srr) { 165 if (!daddr) 166 return -EINVAL; 167 nexthop = inet->opt->faddr; 168 } 169 170 orig_sport = inet->inet_sport; 171 orig_dport = usin->sin_port; 172 rt = ip_route_connect(nexthop, inet->inet_saddr, 173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 174 IPPROTO_TCP, 175 orig_sport, orig_dport, sk, true); 176 if (IS_ERR(rt)) { 177 err = PTR_ERR(rt); 178 if (err == -ENETUNREACH) 179 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 180 return err; 181 } 182 183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 184 ip_rt_put(rt); 185 return -ENETUNREACH; 186 } 187 188 if (!inet->opt || !inet->opt->srr) 189 daddr = rt->rt_dst; 190 191 if (!inet->inet_saddr) 192 inet->inet_saddr = rt->rt_src; 193 inet->inet_rcv_saddr = inet->inet_saddr; 194 195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 196 /* Reset inherited state */ 197 tp->rx_opt.ts_recent = 0; 198 tp->rx_opt.ts_recent_stamp = 0; 199 tp->write_seq = 0; 200 } 201 202 if (tcp_death_row.sysctl_tw_recycle && 203 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 204 struct inet_peer *peer = rt_get_peer(rt); 205 /* 206 * VJ's idea. We save last timestamp seen from 207 * the destination in peer table, when entering state 208 * TIME-WAIT * and initialize rx_opt.ts_recent from it, 209 * when trying new connection. 210 */ 211 if (peer) { 212 inet_peer_refcheck(peer); 213 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 214 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 215 tp->rx_opt.ts_recent = peer->tcp_ts; 216 } 217 } 218 } 219 220 inet->inet_dport = usin->sin_port; 221 inet->inet_daddr = daddr; 222 223 inet_csk(sk)->icsk_ext_hdr_len = 0; 224 if (inet->opt) 225 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 226 227 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 228 229 /* Socket identity is still unknown (sport may be zero). 230 * However we set state to SYN-SENT and not releasing socket 231 * lock select source port, enter ourselves into the hash tables and 232 * complete initialization after this. 233 */ 234 tcp_set_state(sk, TCP_SYN_SENT); 235 err = inet_hash_connect(&tcp_death_row, sk); 236 if (err) 237 goto failure; 238 239 rt = ip_route_newports(rt, IPPROTO_TCP, 240 orig_sport, orig_dport, 241 inet->inet_sport, inet->inet_dport, sk); 242 if (IS_ERR(rt)) { 243 err = PTR_ERR(rt); 244 rt = NULL; 245 goto failure; 246 } 247 /* OK, now commit destination to socket. */ 248 sk->sk_gso_type = SKB_GSO_TCPV4; 249 sk_setup_caps(sk, &rt->dst); 250 251 if (!tp->write_seq) 252 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 253 inet->inet_daddr, 254 inet->inet_sport, 255 usin->sin_port); 256 257 inet->inet_id = tp->write_seq ^ jiffies; 258 259 err = tcp_connect(sk); 260 rt = NULL; 261 if (err) 262 goto failure; 263 264 return 0; 265 266 failure: 267 /* 268 * This unhashes the socket and releases the local port, 269 * if necessary. 270 */ 271 tcp_set_state(sk, TCP_CLOSE); 272 ip_rt_put(rt); 273 sk->sk_route_caps = 0; 274 inet->inet_dport = 0; 275 return err; 276 } 277 EXPORT_SYMBOL(tcp_v4_connect); 278 279 /* 280 * This routine does path mtu discovery as defined in RFC1191. 281 */ 282 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) 283 { 284 struct dst_entry *dst; 285 struct inet_sock *inet = inet_sk(sk); 286 287 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 288 * send out by Linux are always <576bytes so they should go through 289 * unfragmented). 290 */ 291 if (sk->sk_state == TCP_LISTEN) 292 return; 293 294 /* We don't check in the destentry if pmtu discovery is forbidden 295 * on this route. We just assume that no packet_to_big packets 296 * are send back when pmtu discovery is not active. 297 * There is a small race when the user changes this flag in the 298 * route, but I think that's acceptable. 299 */ 300 if ((dst = __sk_dst_check(sk, 0)) == NULL) 301 return; 302 303 dst->ops->update_pmtu(dst, mtu); 304 305 /* Something is about to be wrong... Remember soft error 306 * for the case, if this connection will not able to recover. 307 */ 308 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 309 sk->sk_err_soft = EMSGSIZE; 310 311 mtu = dst_mtu(dst); 312 313 if (inet->pmtudisc != IP_PMTUDISC_DONT && 314 inet_csk(sk)->icsk_pmtu_cookie > mtu) { 315 tcp_sync_mss(sk, mtu); 316 317 /* Resend the TCP packet because it's 318 * clear that the old packet has been 319 * dropped. This is the new "fast" path mtu 320 * discovery. 321 */ 322 tcp_simple_retransmit(sk); 323 } /* else let the usual retransmit timer handle it */ 324 } 325 326 /* 327 * This routine is called by the ICMP module when it gets some 328 * sort of error condition. If err < 0 then the socket should 329 * be closed and the error returned to the user. If err > 0 330 * it's just the icmp type << 8 | icmp code. After adjustment 331 * header points to the first 8 bytes of the tcp header. We need 332 * to find the appropriate port. 333 * 334 * The locking strategy used here is very "optimistic". When 335 * someone else accesses the socket the ICMP is just dropped 336 * and for some paths there is no check at all. 337 * A more general error queue to queue errors for later handling 338 * is probably better. 339 * 340 */ 341 342 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 343 { 344 struct iphdr *iph = (struct iphdr *)icmp_skb->data; 345 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 346 struct inet_connection_sock *icsk; 347 struct tcp_sock *tp; 348 struct inet_sock *inet; 349 const int type = icmp_hdr(icmp_skb)->type; 350 const int code = icmp_hdr(icmp_skb)->code; 351 struct sock *sk; 352 struct sk_buff *skb; 353 __u32 seq; 354 __u32 remaining; 355 int err; 356 struct net *net = dev_net(icmp_skb->dev); 357 358 if (icmp_skb->len < (iph->ihl << 2) + 8) { 359 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 360 return; 361 } 362 363 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 364 iph->saddr, th->source, inet_iif(icmp_skb)); 365 if (!sk) { 366 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 367 return; 368 } 369 if (sk->sk_state == TCP_TIME_WAIT) { 370 inet_twsk_put(inet_twsk(sk)); 371 return; 372 } 373 374 bh_lock_sock(sk); 375 /* If too many ICMPs get dropped on busy 376 * servers this needs to be solved differently. 377 */ 378 if (sock_owned_by_user(sk)) 379 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 380 381 if (sk->sk_state == TCP_CLOSE) 382 goto out; 383 384 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 385 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 386 goto out; 387 } 388 389 icsk = inet_csk(sk); 390 tp = tcp_sk(sk); 391 seq = ntohl(th->seq); 392 if (sk->sk_state != TCP_LISTEN && 393 !between(seq, tp->snd_una, tp->snd_nxt)) { 394 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 395 goto out; 396 } 397 398 switch (type) { 399 case ICMP_SOURCE_QUENCH: 400 /* Just silently ignore these. */ 401 goto out; 402 case ICMP_PARAMETERPROB: 403 err = EPROTO; 404 break; 405 case ICMP_DEST_UNREACH: 406 if (code > NR_ICMP_UNREACH) 407 goto out; 408 409 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 410 if (!sock_owned_by_user(sk)) 411 do_pmtu_discovery(sk, iph, info); 412 goto out; 413 } 414 415 err = icmp_err_convert[code].errno; 416 /* check if icmp_skb allows revert of backoff 417 * (see draft-zimmermann-tcp-lcd) */ 418 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 419 break; 420 if (seq != tp->snd_una || !icsk->icsk_retransmits || 421 !icsk->icsk_backoff) 422 break; 423 424 if (sock_owned_by_user(sk)) 425 break; 426 427 icsk->icsk_backoff--; 428 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 429 icsk->icsk_backoff; 430 tcp_bound_rto(sk); 431 432 skb = tcp_write_queue_head(sk); 433 BUG_ON(!skb); 434 435 remaining = icsk->icsk_rto - min(icsk->icsk_rto, 436 tcp_time_stamp - TCP_SKB_CB(skb)->when); 437 438 if (remaining) { 439 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 440 remaining, TCP_RTO_MAX); 441 } else { 442 /* RTO revert clocked out retransmission. 443 * Will retransmit now */ 444 tcp_retransmit_timer(sk); 445 } 446 447 break; 448 case ICMP_TIME_EXCEEDED: 449 err = EHOSTUNREACH; 450 break; 451 default: 452 goto out; 453 } 454 455 switch (sk->sk_state) { 456 struct request_sock *req, **prev; 457 case TCP_LISTEN: 458 if (sock_owned_by_user(sk)) 459 goto out; 460 461 req = inet_csk_search_req(sk, &prev, th->dest, 462 iph->daddr, iph->saddr); 463 if (!req) 464 goto out; 465 466 /* ICMPs are not backlogged, hence we cannot get 467 an established socket here. 468 */ 469 WARN_ON(req->sk); 470 471 if (seq != tcp_rsk(req)->snt_isn) { 472 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 473 goto out; 474 } 475 476 /* 477 * Still in SYN_RECV, just remove it silently. 478 * There is no good way to pass the error to the newly 479 * created socket, and POSIX does not want network 480 * errors returned from accept(). 481 */ 482 inet_csk_reqsk_queue_drop(sk, req, prev); 483 goto out; 484 485 case TCP_SYN_SENT: 486 case TCP_SYN_RECV: /* Cannot happen. 487 It can f.e. if SYNs crossed. 488 */ 489 if (!sock_owned_by_user(sk)) { 490 sk->sk_err = err; 491 492 sk->sk_error_report(sk); 493 494 tcp_done(sk); 495 } else { 496 sk->sk_err_soft = err; 497 } 498 goto out; 499 } 500 501 /* If we've already connected we will keep trying 502 * until we time out, or the user gives up. 503 * 504 * rfc1122 4.2.3.9 allows to consider as hard errors 505 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 506 * but it is obsoleted by pmtu discovery). 507 * 508 * Note, that in modern internet, where routing is unreliable 509 * and in each dark corner broken firewalls sit, sending random 510 * errors ordered by their masters even this two messages finally lose 511 * their original sense (even Linux sends invalid PORT_UNREACHs) 512 * 513 * Now we are in compliance with RFCs. 514 * --ANK (980905) 515 */ 516 517 inet = inet_sk(sk); 518 if (!sock_owned_by_user(sk) && inet->recverr) { 519 sk->sk_err = err; 520 sk->sk_error_report(sk); 521 } else { /* Only an error on timeout */ 522 sk->sk_err_soft = err; 523 } 524 525 out: 526 bh_unlock_sock(sk); 527 sock_put(sk); 528 } 529 530 static void __tcp_v4_send_check(struct sk_buff *skb, 531 __be32 saddr, __be32 daddr) 532 { 533 struct tcphdr *th = tcp_hdr(skb); 534 535 if (skb->ip_summed == CHECKSUM_PARTIAL) { 536 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 537 skb->csum_start = skb_transport_header(skb) - skb->head; 538 skb->csum_offset = offsetof(struct tcphdr, check); 539 } else { 540 th->check = tcp_v4_check(skb->len, saddr, daddr, 541 csum_partial(th, 542 th->doff << 2, 543 skb->csum)); 544 } 545 } 546 547 /* This routine computes an IPv4 TCP checksum. */ 548 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 549 { 550 struct inet_sock *inet = inet_sk(sk); 551 552 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 553 } 554 EXPORT_SYMBOL(tcp_v4_send_check); 555 556 int tcp_v4_gso_send_check(struct sk_buff *skb) 557 { 558 const struct iphdr *iph; 559 struct tcphdr *th; 560 561 if (!pskb_may_pull(skb, sizeof(*th))) 562 return -EINVAL; 563 564 iph = ip_hdr(skb); 565 th = tcp_hdr(skb); 566 567 th->check = 0; 568 skb->ip_summed = CHECKSUM_PARTIAL; 569 __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 570 return 0; 571 } 572 573 /* 574 * This routine will send an RST to the other tcp. 575 * 576 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 577 * for reset. 578 * Answer: if a packet caused RST, it is not for a socket 579 * existing in our system, if it is matched to a socket, 580 * it is just duplicate segment or bug in other side's TCP. 581 * So that we build reply only basing on parameters 582 * arrived with segment. 583 * Exception: precedence violation. We do not implement it in any case. 584 */ 585 586 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 587 { 588 struct tcphdr *th = tcp_hdr(skb); 589 struct { 590 struct tcphdr th; 591 #ifdef CONFIG_TCP_MD5SIG 592 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 593 #endif 594 } rep; 595 struct ip_reply_arg arg; 596 #ifdef CONFIG_TCP_MD5SIG 597 struct tcp_md5sig_key *key; 598 #endif 599 struct net *net; 600 601 /* Never send a reset in response to a reset. */ 602 if (th->rst) 603 return; 604 605 if (skb_rtable(skb)->rt_type != RTN_LOCAL) 606 return; 607 608 /* Swap the send and the receive. */ 609 memset(&rep, 0, sizeof(rep)); 610 rep.th.dest = th->source; 611 rep.th.source = th->dest; 612 rep.th.doff = sizeof(struct tcphdr) / 4; 613 rep.th.rst = 1; 614 615 if (th->ack) { 616 rep.th.seq = th->ack_seq; 617 } else { 618 rep.th.ack = 1; 619 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 620 skb->len - (th->doff << 2)); 621 } 622 623 memset(&arg, 0, sizeof(arg)); 624 arg.iov[0].iov_base = (unsigned char *)&rep; 625 arg.iov[0].iov_len = sizeof(rep.th); 626 627 #ifdef CONFIG_TCP_MD5SIG 628 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; 629 if (key) { 630 rep.opt[0] = htonl((TCPOPT_NOP << 24) | 631 (TCPOPT_NOP << 16) | 632 (TCPOPT_MD5SIG << 8) | 633 TCPOLEN_MD5SIG); 634 /* Update length and the length the header thinks exists */ 635 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 636 rep.th.doff = arg.iov[0].iov_len / 4; 637 638 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 639 key, ip_hdr(skb)->saddr, 640 ip_hdr(skb)->daddr, &rep.th); 641 } 642 #endif 643 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 644 ip_hdr(skb)->saddr, /* XXX */ 645 arg.iov[0].iov_len, IPPROTO_TCP, 0); 646 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 647 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 648 649 net = dev_net(skb_dst(skb)->dev); 650 ip_send_reply(net->ipv4.tcp_sock, skb, 651 &arg, arg.iov[0].iov_len); 652 653 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 654 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 655 } 656 657 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 658 outside socket context is ugly, certainly. What can I do? 659 */ 660 661 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 662 u32 win, u32 ts, int oif, 663 struct tcp_md5sig_key *key, 664 int reply_flags) 665 { 666 struct tcphdr *th = tcp_hdr(skb); 667 struct { 668 struct tcphdr th; 669 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 670 #ifdef CONFIG_TCP_MD5SIG 671 + (TCPOLEN_MD5SIG_ALIGNED >> 2) 672 #endif 673 ]; 674 } rep; 675 struct ip_reply_arg arg; 676 struct net *net = dev_net(skb_dst(skb)->dev); 677 678 memset(&rep.th, 0, sizeof(struct tcphdr)); 679 memset(&arg, 0, sizeof(arg)); 680 681 arg.iov[0].iov_base = (unsigned char *)&rep; 682 arg.iov[0].iov_len = sizeof(rep.th); 683 if (ts) { 684 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 685 (TCPOPT_TIMESTAMP << 8) | 686 TCPOLEN_TIMESTAMP); 687 rep.opt[1] = htonl(tcp_time_stamp); 688 rep.opt[2] = htonl(ts); 689 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 690 } 691 692 /* Swap the send and the receive. */ 693 rep.th.dest = th->source; 694 rep.th.source = th->dest; 695 rep.th.doff = arg.iov[0].iov_len / 4; 696 rep.th.seq = htonl(seq); 697 rep.th.ack_seq = htonl(ack); 698 rep.th.ack = 1; 699 rep.th.window = htons(win); 700 701 #ifdef CONFIG_TCP_MD5SIG 702 if (key) { 703 int offset = (ts) ? 3 : 0; 704 705 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 706 (TCPOPT_NOP << 16) | 707 (TCPOPT_MD5SIG << 8) | 708 TCPOLEN_MD5SIG); 709 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 710 rep.th.doff = arg.iov[0].iov_len/4; 711 712 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 713 key, ip_hdr(skb)->saddr, 714 ip_hdr(skb)->daddr, &rep.th); 715 } 716 #endif 717 arg.flags = reply_flags; 718 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 719 ip_hdr(skb)->saddr, /* XXX */ 720 arg.iov[0].iov_len, IPPROTO_TCP, 0); 721 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 722 if (oif) 723 arg.bound_dev_if = oif; 724 725 ip_send_reply(net->ipv4.tcp_sock, skb, 726 &arg, arg.iov[0].iov_len); 727 728 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 729 } 730 731 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 732 { 733 struct inet_timewait_sock *tw = inet_twsk(sk); 734 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 735 736 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 737 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 738 tcptw->tw_ts_recent, 739 tw->tw_bound_dev_if, 740 tcp_twsk_md5_key(tcptw), 741 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 742 ); 743 744 inet_twsk_put(tw); 745 } 746 747 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 748 struct request_sock *req) 749 { 750 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, 751 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 752 req->ts_recent, 753 0, 754 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), 755 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); 756 } 757 758 /* 759 * Send a SYN-ACK after having received a SYN. 760 * This still operates on a request_sock only, not on a big 761 * socket. 762 */ 763 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 764 struct request_sock *req, 765 struct request_values *rvp) 766 { 767 const struct inet_request_sock *ireq = inet_rsk(req); 768 int err = -1; 769 struct sk_buff * skb; 770 771 /* First, grab a route. */ 772 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 773 return -1; 774 775 skb = tcp_make_synack(sk, dst, req, rvp); 776 777 if (skb) { 778 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); 779 780 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 781 ireq->rmt_addr, 782 ireq->opt); 783 err = net_xmit_eval(err); 784 } 785 786 dst_release(dst); 787 return err; 788 } 789 790 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 791 struct request_values *rvp) 792 { 793 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 794 return tcp_v4_send_synack(sk, NULL, req, rvp); 795 } 796 797 /* 798 * IPv4 request_sock destructor. 799 */ 800 static void tcp_v4_reqsk_destructor(struct request_sock *req) 801 { 802 kfree(inet_rsk(req)->opt); 803 } 804 805 static void syn_flood_warning(const struct sk_buff *skb) 806 { 807 const char *msg; 808 809 #ifdef CONFIG_SYN_COOKIES 810 if (sysctl_tcp_syncookies) 811 msg = "Sending cookies"; 812 else 813 #endif 814 msg = "Dropping request"; 815 816 pr_info("TCP: Possible SYN flooding on port %d. %s.\n", 817 ntohs(tcp_hdr(skb)->dest), msg); 818 } 819 820 /* 821 * Save and compile IPv4 options into the request_sock if needed. 822 */ 823 static struct ip_options *tcp_v4_save_options(struct sock *sk, 824 struct sk_buff *skb) 825 { 826 struct ip_options *opt = &(IPCB(skb)->opt); 827 struct ip_options *dopt = NULL; 828 829 if (opt && opt->optlen) { 830 int opt_size = optlength(opt); 831 dopt = kmalloc(opt_size, GFP_ATOMIC); 832 if (dopt) { 833 if (ip_options_echo(dopt, skb)) { 834 kfree(dopt); 835 dopt = NULL; 836 } 837 } 838 } 839 return dopt; 840 } 841 842 #ifdef CONFIG_TCP_MD5SIG 843 /* 844 * RFC2385 MD5 checksumming requires a mapping of 845 * IP address->MD5 Key. 846 * We need to maintain these in the sk structure. 847 */ 848 849 /* Find the Key structure for an address. */ 850 static struct tcp_md5sig_key * 851 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) 852 { 853 struct tcp_sock *tp = tcp_sk(sk); 854 int i; 855 856 if (!tp->md5sig_info || !tp->md5sig_info->entries4) 857 return NULL; 858 for (i = 0; i < tp->md5sig_info->entries4; i++) { 859 if (tp->md5sig_info->keys4[i].addr == addr) 860 return &tp->md5sig_info->keys4[i].base; 861 } 862 return NULL; 863 } 864 865 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 866 struct sock *addr_sk) 867 { 868 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); 869 } 870 EXPORT_SYMBOL(tcp_v4_md5_lookup); 871 872 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 873 struct request_sock *req) 874 { 875 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); 876 } 877 878 /* This can be called on a newly created socket, from other files */ 879 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, 880 u8 *newkey, u8 newkeylen) 881 { 882 /* Add Key to the list */ 883 struct tcp_md5sig_key *key; 884 struct tcp_sock *tp = tcp_sk(sk); 885 struct tcp4_md5sig_key *keys; 886 887 key = tcp_v4_md5_do_lookup(sk, addr); 888 if (key) { 889 /* Pre-existing entry - just update that one. */ 890 kfree(key->key); 891 key->key = newkey; 892 key->keylen = newkeylen; 893 } else { 894 struct tcp_md5sig_info *md5sig; 895 896 if (!tp->md5sig_info) { 897 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), 898 GFP_ATOMIC); 899 if (!tp->md5sig_info) { 900 kfree(newkey); 901 return -ENOMEM; 902 } 903 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 904 } 905 if (tcp_alloc_md5sig_pool(sk) == NULL) { 906 kfree(newkey); 907 return -ENOMEM; 908 } 909 md5sig = tp->md5sig_info; 910 911 if (md5sig->alloced4 == md5sig->entries4) { 912 keys = kmalloc((sizeof(*keys) * 913 (md5sig->entries4 + 1)), GFP_ATOMIC); 914 if (!keys) { 915 kfree(newkey); 916 tcp_free_md5sig_pool(); 917 return -ENOMEM; 918 } 919 920 if (md5sig->entries4) 921 memcpy(keys, md5sig->keys4, 922 sizeof(*keys) * md5sig->entries4); 923 924 /* Free old key list, and reference new one */ 925 kfree(md5sig->keys4); 926 md5sig->keys4 = keys; 927 md5sig->alloced4++; 928 } 929 md5sig->entries4++; 930 md5sig->keys4[md5sig->entries4 - 1].addr = addr; 931 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; 932 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; 933 } 934 return 0; 935 } 936 EXPORT_SYMBOL(tcp_v4_md5_do_add); 937 938 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 939 u8 *newkey, u8 newkeylen) 940 { 941 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, 942 newkey, newkeylen); 943 } 944 945 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) 946 { 947 struct tcp_sock *tp = tcp_sk(sk); 948 int i; 949 950 for (i = 0; i < tp->md5sig_info->entries4; i++) { 951 if (tp->md5sig_info->keys4[i].addr == addr) { 952 /* Free the key */ 953 kfree(tp->md5sig_info->keys4[i].base.key); 954 tp->md5sig_info->entries4--; 955 956 if (tp->md5sig_info->entries4 == 0) { 957 kfree(tp->md5sig_info->keys4); 958 tp->md5sig_info->keys4 = NULL; 959 tp->md5sig_info->alloced4 = 0; 960 } else if (tp->md5sig_info->entries4 != i) { 961 /* Need to do some manipulation */ 962 memmove(&tp->md5sig_info->keys4[i], 963 &tp->md5sig_info->keys4[i+1], 964 (tp->md5sig_info->entries4 - i) * 965 sizeof(struct tcp4_md5sig_key)); 966 } 967 tcp_free_md5sig_pool(); 968 return 0; 969 } 970 } 971 return -ENOENT; 972 } 973 EXPORT_SYMBOL(tcp_v4_md5_do_del); 974 975 static void tcp_v4_clear_md5_list(struct sock *sk) 976 { 977 struct tcp_sock *tp = tcp_sk(sk); 978 979 /* Free each key, then the set of key keys, 980 * the crypto element, and then decrement our 981 * hold on the last resort crypto. 982 */ 983 if (tp->md5sig_info->entries4) { 984 int i; 985 for (i = 0; i < tp->md5sig_info->entries4; i++) 986 kfree(tp->md5sig_info->keys4[i].base.key); 987 tp->md5sig_info->entries4 = 0; 988 tcp_free_md5sig_pool(); 989 } 990 if (tp->md5sig_info->keys4) { 991 kfree(tp->md5sig_info->keys4); 992 tp->md5sig_info->keys4 = NULL; 993 tp->md5sig_info->alloced4 = 0; 994 } 995 } 996 997 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 998 int optlen) 999 { 1000 struct tcp_md5sig cmd; 1001 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 1002 u8 *newkey; 1003 1004 if (optlen < sizeof(cmd)) 1005 return -EINVAL; 1006 1007 if (copy_from_user(&cmd, optval, sizeof(cmd))) 1008 return -EFAULT; 1009 1010 if (sin->sin_family != AF_INET) 1011 return -EINVAL; 1012 1013 if (!cmd.tcpm_key || !cmd.tcpm_keylen) { 1014 if (!tcp_sk(sk)->md5sig_info) 1015 return -ENOENT; 1016 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); 1017 } 1018 1019 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1020 return -EINVAL; 1021 1022 if (!tcp_sk(sk)->md5sig_info) { 1023 struct tcp_sock *tp = tcp_sk(sk); 1024 struct tcp_md5sig_info *p; 1025 1026 p = kzalloc(sizeof(*p), sk->sk_allocation); 1027 if (!p) 1028 return -EINVAL; 1029 1030 tp->md5sig_info = p; 1031 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 1032 } 1033 1034 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); 1035 if (!newkey) 1036 return -ENOMEM; 1037 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, 1038 newkey, cmd.tcpm_keylen); 1039 } 1040 1041 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 1042 __be32 daddr, __be32 saddr, int nbytes) 1043 { 1044 struct tcp4_pseudohdr *bp; 1045 struct scatterlist sg; 1046 1047 bp = &hp->md5_blk.ip4; 1048 1049 /* 1050 * 1. the TCP pseudo-header (in the order: source IP address, 1051 * destination IP address, zero-padded protocol number, and 1052 * segment length) 1053 */ 1054 bp->saddr = saddr; 1055 bp->daddr = daddr; 1056 bp->pad = 0; 1057 bp->protocol = IPPROTO_TCP; 1058 bp->len = cpu_to_be16(nbytes); 1059 1060 sg_init_one(&sg, bp, sizeof(*bp)); 1061 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 1062 } 1063 1064 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 1065 __be32 daddr, __be32 saddr, struct tcphdr *th) 1066 { 1067 struct tcp_md5sig_pool *hp; 1068 struct hash_desc *desc; 1069 1070 hp = tcp_get_md5sig_pool(); 1071 if (!hp) 1072 goto clear_hash_noput; 1073 desc = &hp->md5_desc; 1074 1075 if (crypto_hash_init(desc)) 1076 goto clear_hash; 1077 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 1078 goto clear_hash; 1079 if (tcp_md5_hash_header(hp, th)) 1080 goto clear_hash; 1081 if (tcp_md5_hash_key(hp, key)) 1082 goto clear_hash; 1083 if (crypto_hash_final(desc, md5_hash)) 1084 goto clear_hash; 1085 1086 tcp_put_md5sig_pool(); 1087 return 0; 1088 1089 clear_hash: 1090 tcp_put_md5sig_pool(); 1091 clear_hash_noput: 1092 memset(md5_hash, 0, 16); 1093 return 1; 1094 } 1095 1096 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, 1097 struct sock *sk, struct request_sock *req, 1098 struct sk_buff *skb) 1099 { 1100 struct tcp_md5sig_pool *hp; 1101 struct hash_desc *desc; 1102 struct tcphdr *th = tcp_hdr(skb); 1103 __be32 saddr, daddr; 1104 1105 if (sk) { 1106 saddr = inet_sk(sk)->inet_saddr; 1107 daddr = inet_sk(sk)->inet_daddr; 1108 } else if (req) { 1109 saddr = inet_rsk(req)->loc_addr; 1110 daddr = inet_rsk(req)->rmt_addr; 1111 } else { 1112 const struct iphdr *iph = ip_hdr(skb); 1113 saddr = iph->saddr; 1114 daddr = iph->daddr; 1115 } 1116 1117 hp = tcp_get_md5sig_pool(); 1118 if (!hp) 1119 goto clear_hash_noput; 1120 desc = &hp->md5_desc; 1121 1122 if (crypto_hash_init(desc)) 1123 goto clear_hash; 1124 1125 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 1126 goto clear_hash; 1127 if (tcp_md5_hash_header(hp, th)) 1128 goto clear_hash; 1129 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 1130 goto clear_hash; 1131 if (tcp_md5_hash_key(hp, key)) 1132 goto clear_hash; 1133 if (crypto_hash_final(desc, md5_hash)) 1134 goto clear_hash; 1135 1136 tcp_put_md5sig_pool(); 1137 return 0; 1138 1139 clear_hash: 1140 tcp_put_md5sig_pool(); 1141 clear_hash_noput: 1142 memset(md5_hash, 0, 16); 1143 return 1; 1144 } 1145 EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1146 1147 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1148 { 1149 /* 1150 * This gets called for each TCP segment that arrives 1151 * so we want to be efficient. 1152 * We have 3 drop cases: 1153 * o No MD5 hash and one expected. 1154 * o MD5 hash and we're not expecting one. 1155 * o MD5 hash and its wrong. 1156 */ 1157 __u8 *hash_location = NULL; 1158 struct tcp_md5sig_key *hash_expected; 1159 const struct iphdr *iph = ip_hdr(skb); 1160 struct tcphdr *th = tcp_hdr(skb); 1161 int genhash; 1162 unsigned char newhash[16]; 1163 1164 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); 1165 hash_location = tcp_parse_md5sig_option(th); 1166 1167 /* We've parsed the options - do we have a hash? */ 1168 if (!hash_expected && !hash_location) 1169 return 0; 1170 1171 if (hash_expected && !hash_location) { 1172 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1173 return 1; 1174 } 1175 1176 if (!hash_expected && hash_location) { 1177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1178 return 1; 1179 } 1180 1181 /* Okay, so this is hash_expected and hash_location - 1182 * so we need to calculate the checksum. 1183 */ 1184 genhash = tcp_v4_md5_hash_skb(newhash, 1185 hash_expected, 1186 NULL, NULL, skb); 1187 1188 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1189 if (net_ratelimit()) { 1190 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1191 &iph->saddr, ntohs(th->source), 1192 &iph->daddr, ntohs(th->dest), 1193 genhash ? " tcp_v4_calc_md5_hash failed" : ""); 1194 } 1195 return 1; 1196 } 1197 return 0; 1198 } 1199 1200 #endif 1201 1202 struct request_sock_ops tcp_request_sock_ops __read_mostly = { 1203 .family = PF_INET, 1204 .obj_size = sizeof(struct tcp_request_sock), 1205 .rtx_syn_ack = tcp_v4_rtx_synack, 1206 .send_ack = tcp_v4_reqsk_send_ack, 1207 .destructor = tcp_v4_reqsk_destructor, 1208 .send_reset = tcp_v4_send_reset, 1209 .syn_ack_timeout = tcp_syn_ack_timeout, 1210 }; 1211 1212 #ifdef CONFIG_TCP_MD5SIG 1213 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1214 .md5_lookup = tcp_v4_reqsk_md5_lookup, 1215 .calc_md5_hash = tcp_v4_md5_hash_skb, 1216 }; 1217 #endif 1218 1219 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1220 { 1221 struct tcp_extend_values tmp_ext; 1222 struct tcp_options_received tmp_opt; 1223 u8 *hash_location; 1224 struct request_sock *req; 1225 struct inet_request_sock *ireq; 1226 struct tcp_sock *tp = tcp_sk(sk); 1227 struct dst_entry *dst = NULL; 1228 __be32 saddr = ip_hdr(skb)->saddr; 1229 __be32 daddr = ip_hdr(skb)->daddr; 1230 __u32 isn = TCP_SKB_CB(skb)->when; 1231 #ifdef CONFIG_SYN_COOKIES 1232 int want_cookie = 0; 1233 #else 1234 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ 1235 #endif 1236 1237 /* Never answer to SYNs send to broadcast or multicast */ 1238 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1239 goto drop; 1240 1241 /* TW buckets are converted to open requests without 1242 * limitations, they conserve resources and peer is 1243 * evidently real one. 1244 */ 1245 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1246 if (net_ratelimit()) 1247 syn_flood_warning(skb); 1248 #ifdef CONFIG_SYN_COOKIES 1249 if (sysctl_tcp_syncookies) { 1250 want_cookie = 1; 1251 } else 1252 #endif 1253 goto drop; 1254 } 1255 1256 /* Accept backlog is full. If we have already queued enough 1257 * of warm entries in syn queue, drop request. It is better than 1258 * clogging syn queue with openreqs with exponentially increasing 1259 * timeout. 1260 */ 1261 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 1262 goto drop; 1263 1264 req = inet_reqsk_alloc(&tcp_request_sock_ops); 1265 if (!req) 1266 goto drop; 1267 1268 #ifdef CONFIG_TCP_MD5SIG 1269 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1270 #endif 1271 1272 tcp_clear_options(&tmp_opt); 1273 tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 1274 tmp_opt.user_mss = tp->rx_opt.user_mss; 1275 tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 1276 1277 if (tmp_opt.cookie_plus > 0 && 1278 tmp_opt.saw_tstamp && 1279 !tp->rx_opt.cookie_out_never && 1280 (sysctl_tcp_cookie_size > 0 || 1281 (tp->cookie_values != NULL && 1282 tp->cookie_values->cookie_desired > 0))) { 1283 u8 *c; 1284 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; 1285 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; 1286 1287 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) 1288 goto drop_and_release; 1289 1290 /* Secret recipe starts with IP addresses */ 1291 *mess++ ^= (__force u32)daddr; 1292 *mess++ ^= (__force u32)saddr; 1293 1294 /* plus variable length Initiator Cookie */ 1295 c = (u8 *)mess; 1296 while (l-- > 0) 1297 *c++ ^= *hash_location++; 1298 1299 #ifdef CONFIG_SYN_COOKIES 1300 want_cookie = 0; /* not our kind of cookie */ 1301 #endif 1302 tmp_ext.cookie_out_never = 0; /* false */ 1303 tmp_ext.cookie_plus = tmp_opt.cookie_plus; 1304 } else if (!tp->rx_opt.cookie_in_always) { 1305 /* redundant indications, but ensure initialization. */ 1306 tmp_ext.cookie_out_never = 1; /* true */ 1307 tmp_ext.cookie_plus = 0; 1308 } else { 1309 goto drop_and_release; 1310 } 1311 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; 1312 1313 if (want_cookie && !tmp_opt.saw_tstamp) 1314 tcp_clear_options(&tmp_opt); 1315 1316 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 1317 tcp_openreq_init(req, &tmp_opt, skb); 1318 1319 ireq = inet_rsk(req); 1320 ireq->loc_addr = daddr; 1321 ireq->rmt_addr = saddr; 1322 ireq->no_srccheck = inet_sk(sk)->transparent; 1323 ireq->opt = tcp_v4_save_options(sk, skb); 1324 1325 if (security_inet_conn_request(sk, skb, req)) 1326 goto drop_and_free; 1327 1328 if (!want_cookie || tmp_opt.tstamp_ok) 1329 TCP_ECN_create_request(req, tcp_hdr(skb)); 1330 1331 if (want_cookie) { 1332 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1333 req->cookie_ts = tmp_opt.tstamp_ok; 1334 } else if (!isn) { 1335 struct inet_peer *peer = NULL; 1336 1337 /* VJ's idea. We save last timestamp seen 1338 * from the destination in peer table, when entering 1339 * state TIME-WAIT, and check against it before 1340 * accepting new connection request. 1341 * 1342 * If "isn" is not zero, this request hit alive 1343 * timewait bucket, so that all the necessary checks 1344 * are made in the function processing timewait state. 1345 */ 1346 if (tmp_opt.saw_tstamp && 1347 tcp_death_row.sysctl_tw_recycle && 1348 (dst = inet_csk_route_req(sk, req)) != NULL && 1349 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1350 peer->daddr.addr.a4 == saddr) { 1351 inet_peer_refcheck(peer); 1352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1353 (s32)(peer->tcp_ts - req->ts_recent) > 1354 TCP_PAWS_WINDOW) { 1355 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 1356 goto drop_and_release; 1357 } 1358 } 1359 /* Kill the following clause, if you dislike this way. */ 1360 else if (!sysctl_tcp_syncookies && 1361 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 1362 (sysctl_max_syn_backlog >> 2)) && 1363 (!peer || !peer->tcp_ts_stamp) && 1364 (!dst || !dst_metric(dst, RTAX_RTT))) { 1365 /* Without syncookies last quarter of 1366 * backlog is filled with destinations, 1367 * proven to be alive. 1368 * It means that we continue to communicate 1369 * to destinations, already remembered 1370 * to the moment of synflood. 1371 */ 1372 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", 1373 &saddr, ntohs(tcp_hdr(skb)->source)); 1374 goto drop_and_release; 1375 } 1376 1377 isn = tcp_v4_init_sequence(skb); 1378 } 1379 tcp_rsk(req)->snt_isn = isn; 1380 1381 if (tcp_v4_send_synack(sk, dst, req, 1382 (struct request_values *)&tmp_ext) || 1383 want_cookie) 1384 goto drop_and_free; 1385 1386 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1387 return 0; 1388 1389 drop_and_release: 1390 dst_release(dst); 1391 drop_and_free: 1392 reqsk_free(req); 1393 drop: 1394 return 0; 1395 } 1396 EXPORT_SYMBOL(tcp_v4_conn_request); 1397 1398 1399 /* 1400 * The three way handshake has completed - we got a valid synack - 1401 * now create the new socket. 1402 */ 1403 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1404 struct request_sock *req, 1405 struct dst_entry *dst) 1406 { 1407 struct inet_request_sock *ireq; 1408 struct inet_sock *newinet; 1409 struct tcp_sock *newtp; 1410 struct sock *newsk; 1411 #ifdef CONFIG_TCP_MD5SIG 1412 struct tcp_md5sig_key *key; 1413 #endif 1414 1415 if (sk_acceptq_is_full(sk)) 1416 goto exit_overflow; 1417 1418 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 1419 goto exit; 1420 1421 newsk = tcp_create_openreq_child(sk, req, skb); 1422 if (!newsk) 1423 goto exit_nonewsk; 1424 1425 newsk->sk_gso_type = SKB_GSO_TCPV4; 1426 sk_setup_caps(newsk, dst); 1427 1428 newtp = tcp_sk(newsk); 1429 newinet = inet_sk(newsk); 1430 ireq = inet_rsk(req); 1431 newinet->inet_daddr = ireq->rmt_addr; 1432 newinet->inet_rcv_saddr = ireq->loc_addr; 1433 newinet->inet_saddr = ireq->loc_addr; 1434 newinet->opt = ireq->opt; 1435 ireq->opt = NULL; 1436 newinet->mc_index = inet_iif(skb); 1437 newinet->mc_ttl = ip_hdr(skb)->ttl; 1438 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1439 if (newinet->opt) 1440 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1441 newinet->inet_id = newtp->write_seq ^ jiffies; 1442 1443 tcp_mtup_init(newsk); 1444 tcp_sync_mss(newsk, dst_mtu(dst)); 1445 newtp->advmss = dst_metric_advmss(dst); 1446 if (tcp_sk(sk)->rx_opt.user_mss && 1447 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1448 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1449 1450 tcp_initialize_rcv_mss(newsk); 1451 1452 #ifdef CONFIG_TCP_MD5SIG 1453 /* Copy over the MD5 key from the original socket */ 1454 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); 1455 if (key != NULL) { 1456 /* 1457 * We're using one, so create a matching key 1458 * on the newsk structure. If we fail to get 1459 * memory, then we end up not copying the key 1460 * across. Shucks. 1461 */ 1462 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); 1463 if (newkey != NULL) 1464 tcp_v4_md5_do_add(newsk, newinet->inet_daddr, 1465 newkey, key->keylen); 1466 sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1467 } 1468 #endif 1469 1470 if (__inet_inherit_port(sk, newsk) < 0) { 1471 sock_put(newsk); 1472 goto exit; 1473 } 1474 __inet_hash_nolisten(newsk, NULL); 1475 1476 return newsk; 1477 1478 exit_overflow: 1479 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1480 exit_nonewsk: 1481 dst_release(dst); 1482 exit: 1483 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1484 return NULL; 1485 } 1486 EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1487 1488 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1489 { 1490 struct tcphdr *th = tcp_hdr(skb); 1491 const struct iphdr *iph = ip_hdr(skb); 1492 struct sock *nsk; 1493 struct request_sock **prev; 1494 /* Find possible connection requests. */ 1495 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 1496 iph->saddr, iph->daddr); 1497 if (req) 1498 return tcp_check_req(sk, skb, req, prev); 1499 1500 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1501 th->source, iph->daddr, th->dest, inet_iif(skb)); 1502 1503 if (nsk) { 1504 if (nsk->sk_state != TCP_TIME_WAIT) { 1505 bh_lock_sock(nsk); 1506 return nsk; 1507 } 1508 inet_twsk_put(inet_twsk(nsk)); 1509 return NULL; 1510 } 1511 1512 #ifdef CONFIG_SYN_COOKIES 1513 if (!th->syn) 1514 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1515 #endif 1516 return sk; 1517 } 1518 1519 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) 1520 { 1521 const struct iphdr *iph = ip_hdr(skb); 1522 1523 if (skb->ip_summed == CHECKSUM_COMPLETE) { 1524 if (!tcp_v4_check(skb->len, iph->saddr, 1525 iph->daddr, skb->csum)) { 1526 skb->ip_summed = CHECKSUM_UNNECESSARY; 1527 return 0; 1528 } 1529 } 1530 1531 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1532 skb->len, IPPROTO_TCP, 0); 1533 1534 if (skb->len <= 76) { 1535 return __skb_checksum_complete(skb); 1536 } 1537 return 0; 1538 } 1539 1540 1541 /* The socket must have it's spinlock held when we get 1542 * here. 1543 * 1544 * We have a potential double-lock case here, so even when 1545 * doing backlog processing we use the BH locking scheme. 1546 * This is because we cannot sleep with the original spinlock 1547 * held. 1548 */ 1549 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 1550 { 1551 struct sock *rsk; 1552 #ifdef CONFIG_TCP_MD5SIG 1553 /* 1554 * We really want to reject the packet as early as possible 1555 * if: 1556 * o We're expecting an MD5'd packet and this is no MD5 tcp option 1557 * o There is an MD5 option and we're not expecting one 1558 */ 1559 if (tcp_v4_inbound_md5_hash(sk, skb)) 1560 goto discard; 1561 #endif 1562 1563 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1564 sock_rps_save_rxhash(sk, skb->rxhash); 1565 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1566 rsk = sk; 1567 goto reset; 1568 } 1569 return 0; 1570 } 1571 1572 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 1573 goto csum_err; 1574 1575 if (sk->sk_state == TCP_LISTEN) { 1576 struct sock *nsk = tcp_v4_hnd_req(sk, skb); 1577 if (!nsk) 1578 goto discard; 1579 1580 if (nsk != sk) { 1581 if (tcp_child_process(sk, nsk, skb)) { 1582 rsk = nsk; 1583 goto reset; 1584 } 1585 return 0; 1586 } 1587 } else 1588 sock_rps_save_rxhash(sk, skb->rxhash); 1589 1590 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1591 rsk = sk; 1592 goto reset; 1593 } 1594 return 0; 1595 1596 reset: 1597 tcp_v4_send_reset(rsk, skb); 1598 discard: 1599 kfree_skb(skb); 1600 /* Be careful here. If this function gets more complicated and 1601 * gcc suffers from register pressure on the x86, sk (in %ebx) 1602 * might be destroyed here. This current version compiles correctly, 1603 * but you have been warned. 1604 */ 1605 return 0; 1606 1607 csum_err: 1608 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1609 goto discard; 1610 } 1611 EXPORT_SYMBOL(tcp_v4_do_rcv); 1612 1613 /* 1614 * From tcp_input.c 1615 */ 1616 1617 int tcp_v4_rcv(struct sk_buff *skb) 1618 { 1619 const struct iphdr *iph; 1620 struct tcphdr *th; 1621 struct sock *sk; 1622 int ret; 1623 struct net *net = dev_net(skb->dev); 1624 1625 if (skb->pkt_type != PACKET_HOST) 1626 goto discard_it; 1627 1628 /* Count it even if it's bad */ 1629 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 1630 1631 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1632 goto discard_it; 1633 1634 th = tcp_hdr(skb); 1635 1636 if (th->doff < sizeof(struct tcphdr) / 4) 1637 goto bad_packet; 1638 if (!pskb_may_pull(skb, th->doff * 4)) 1639 goto discard_it; 1640 1641 /* An explanation is required here, I think. 1642 * Packet length and doff are validated by header prediction, 1643 * provided case of th->doff==0 is eliminated. 1644 * So, we defer the checks. */ 1645 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) 1646 goto bad_packet; 1647 1648 th = tcp_hdr(skb); 1649 iph = ip_hdr(skb); 1650 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1651 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1652 skb->len - th->doff * 4); 1653 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1654 TCP_SKB_CB(skb)->when = 0; 1655 TCP_SKB_CB(skb)->flags = iph->tos; 1656 TCP_SKB_CB(skb)->sacked = 0; 1657 1658 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1659 if (!sk) 1660 goto no_tcp_socket; 1661 1662 process: 1663 if (sk->sk_state == TCP_TIME_WAIT) 1664 goto do_time_wait; 1665 1666 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 1667 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1668 goto discard_and_relse; 1669 } 1670 1671 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1672 goto discard_and_relse; 1673 nf_reset(skb); 1674 1675 if (sk_filter(sk, skb)) 1676 goto discard_and_relse; 1677 1678 skb->dev = NULL; 1679 1680 bh_lock_sock_nested(sk); 1681 ret = 0; 1682 if (!sock_owned_by_user(sk)) { 1683 #ifdef CONFIG_NET_DMA 1684 struct tcp_sock *tp = tcp_sk(sk); 1685 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1686 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); 1687 if (tp->ucopy.dma_chan) 1688 ret = tcp_v4_do_rcv(sk, skb); 1689 else 1690 #endif 1691 { 1692 if (!tcp_prequeue(sk, skb)) 1693 ret = tcp_v4_do_rcv(sk, skb); 1694 } 1695 } else if (unlikely(sk_add_backlog(sk, skb))) { 1696 bh_unlock_sock(sk); 1697 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1698 goto discard_and_relse; 1699 } 1700 bh_unlock_sock(sk); 1701 1702 sock_put(sk); 1703 1704 return ret; 1705 1706 no_tcp_socket: 1707 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1708 goto discard_it; 1709 1710 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1711 bad_packet: 1712 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 1713 } else { 1714 tcp_v4_send_reset(NULL, skb); 1715 } 1716 1717 discard_it: 1718 /* Discard frame. */ 1719 kfree_skb(skb); 1720 return 0; 1721 1722 discard_and_relse: 1723 sock_put(sk); 1724 goto discard_it; 1725 1726 do_time_wait: 1727 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1728 inet_twsk_put(inet_twsk(sk)); 1729 goto discard_it; 1730 } 1731 1732 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1733 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 1734 inet_twsk_put(inet_twsk(sk)); 1735 goto discard_it; 1736 } 1737 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1738 case TCP_TW_SYN: { 1739 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1740 &tcp_hashinfo, 1741 iph->daddr, th->dest, 1742 inet_iif(skb)); 1743 if (sk2) { 1744 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 1745 inet_twsk_put(inet_twsk(sk)); 1746 sk = sk2; 1747 goto process; 1748 } 1749 /* Fall through to ACK */ 1750 } 1751 case TCP_TW_ACK: 1752 tcp_v4_timewait_ack(sk, skb); 1753 break; 1754 case TCP_TW_RST: 1755 goto no_tcp_socket; 1756 case TCP_TW_SUCCESS:; 1757 } 1758 goto discard_it; 1759 } 1760 1761 struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) 1762 { 1763 struct rtable *rt = (struct rtable *) __sk_dst_get(sk); 1764 struct inet_sock *inet = inet_sk(sk); 1765 struct inet_peer *peer; 1766 1767 if (!rt || rt->rt_dst != inet->inet_daddr) { 1768 peer = inet_getpeer_v4(inet->inet_daddr, 1); 1769 *release_it = true; 1770 } else { 1771 if (!rt->peer) 1772 rt_bind_peer(rt, 1); 1773 peer = rt->peer; 1774 *release_it = false; 1775 } 1776 1777 return peer; 1778 } 1779 EXPORT_SYMBOL(tcp_v4_get_peer); 1780 1781 void *tcp_v4_tw_get_peer(struct sock *sk) 1782 { 1783 struct inet_timewait_sock *tw = inet_twsk(sk); 1784 1785 return inet_getpeer_v4(tw->tw_daddr, 1); 1786 } 1787 EXPORT_SYMBOL(tcp_v4_tw_get_peer); 1788 1789 static struct timewait_sock_ops tcp_timewait_sock_ops = { 1790 .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1791 .twsk_unique = tcp_twsk_unique, 1792 .twsk_destructor= tcp_twsk_destructor, 1793 .twsk_getpeer = tcp_v4_tw_get_peer, 1794 }; 1795 1796 const struct inet_connection_sock_af_ops ipv4_specific = { 1797 .queue_xmit = ip_queue_xmit, 1798 .send_check = tcp_v4_send_check, 1799 .rebuild_header = inet_sk_rebuild_header, 1800 .conn_request = tcp_v4_conn_request, 1801 .syn_recv_sock = tcp_v4_syn_recv_sock, 1802 .get_peer = tcp_v4_get_peer, 1803 .net_header_len = sizeof(struct iphdr), 1804 .setsockopt = ip_setsockopt, 1805 .getsockopt = ip_getsockopt, 1806 .addr2sockaddr = inet_csk_addr2sockaddr, 1807 .sockaddr_len = sizeof(struct sockaddr_in), 1808 .bind_conflict = inet_csk_bind_conflict, 1809 #ifdef CONFIG_COMPAT 1810 .compat_setsockopt = compat_ip_setsockopt, 1811 .compat_getsockopt = compat_ip_getsockopt, 1812 #endif 1813 }; 1814 EXPORT_SYMBOL(ipv4_specific); 1815 1816 #ifdef CONFIG_TCP_MD5SIG 1817 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1818 .md5_lookup = tcp_v4_md5_lookup, 1819 .calc_md5_hash = tcp_v4_md5_hash_skb, 1820 .md5_add = tcp_v4_md5_add_func, 1821 .md5_parse = tcp_v4_parse_md5_keys, 1822 }; 1823 #endif 1824 1825 /* NOTE: A lot of things set to zero explicitly by call to 1826 * sk_alloc() so need not be done here. 1827 */ 1828 static int tcp_v4_init_sock(struct sock *sk) 1829 { 1830 struct inet_connection_sock *icsk = inet_csk(sk); 1831 struct tcp_sock *tp = tcp_sk(sk); 1832 1833 skb_queue_head_init(&tp->out_of_order_queue); 1834 tcp_init_xmit_timers(sk); 1835 tcp_prequeue_init(tp); 1836 1837 icsk->icsk_rto = TCP_TIMEOUT_INIT; 1838 tp->mdev = TCP_TIMEOUT_INIT; 1839 1840 /* So many TCP implementations out there (incorrectly) count the 1841 * initial SYN frame in their delayed-ACK and congestion control 1842 * algorithms that we must have the following bandaid to talk 1843 * efficiently to them. -DaveM 1844 */ 1845 tp->snd_cwnd = 2; 1846 1847 /* See draft-stevens-tcpca-spec-01 for discussion of the 1848 * initialization of these values. 1849 */ 1850 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 1851 tp->snd_cwnd_clamp = ~0; 1852 tp->mss_cache = TCP_MSS_DEFAULT; 1853 1854 tp->reordering = sysctl_tcp_reordering; 1855 icsk->icsk_ca_ops = &tcp_init_congestion_ops; 1856 1857 sk->sk_state = TCP_CLOSE; 1858 1859 sk->sk_write_space = sk_stream_write_space; 1860 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 1861 1862 icsk->icsk_af_ops = &ipv4_specific; 1863 icsk->icsk_sync_mss = tcp_sync_mss; 1864 #ifdef CONFIG_TCP_MD5SIG 1865 tp->af_specific = &tcp_sock_ipv4_specific; 1866 #endif 1867 1868 /* TCP Cookie Transactions */ 1869 if (sysctl_tcp_cookie_size > 0) { 1870 /* Default, cookies without s_data_payload. */ 1871 tp->cookie_values = 1872 kzalloc(sizeof(*tp->cookie_values), 1873 sk->sk_allocation); 1874 if (tp->cookie_values != NULL) 1875 kref_init(&tp->cookie_values->kref); 1876 } 1877 /* Presumed zeroed, in order of appearance: 1878 * cookie_in_always, cookie_out_never, 1879 * s_data_constant, s_data_in, s_data_out 1880 */ 1881 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 1882 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 1883 1884 local_bh_disable(); 1885 percpu_counter_inc(&tcp_sockets_allocated); 1886 local_bh_enable(); 1887 1888 return 0; 1889 } 1890 1891 void tcp_v4_destroy_sock(struct sock *sk) 1892 { 1893 struct tcp_sock *tp = tcp_sk(sk); 1894 1895 tcp_clear_xmit_timers(sk); 1896 1897 tcp_cleanup_congestion_control(sk); 1898 1899 /* Cleanup up the write buffer. */ 1900 tcp_write_queue_purge(sk); 1901 1902 /* Cleans up our, hopefully empty, out_of_order_queue. */ 1903 __skb_queue_purge(&tp->out_of_order_queue); 1904 1905 #ifdef CONFIG_TCP_MD5SIG 1906 /* Clean up the MD5 key list, if any */ 1907 if (tp->md5sig_info) { 1908 tcp_v4_clear_md5_list(sk); 1909 kfree(tp->md5sig_info); 1910 tp->md5sig_info = NULL; 1911 } 1912 #endif 1913 1914 #ifdef CONFIG_NET_DMA 1915 /* Cleans up our sk_async_wait_queue */ 1916 __skb_queue_purge(&sk->sk_async_wait_queue); 1917 #endif 1918 1919 /* Clean prequeue, it must be empty really */ 1920 __skb_queue_purge(&tp->ucopy.prequeue); 1921 1922 /* Clean up a referenced TCP bind bucket. */ 1923 if (inet_csk(sk)->icsk_bind_hash) 1924 inet_put_port(sk); 1925 1926 /* 1927 * If sendmsg cached page exists, toss it. 1928 */ 1929 if (sk->sk_sndmsg_page) { 1930 __free_page(sk->sk_sndmsg_page); 1931 sk->sk_sndmsg_page = NULL; 1932 } 1933 1934 /* TCP Cookie Transactions */ 1935 if (tp->cookie_values != NULL) { 1936 kref_put(&tp->cookie_values->kref, 1937 tcp_cookie_values_release); 1938 tp->cookie_values = NULL; 1939 } 1940 1941 percpu_counter_dec(&tcp_sockets_allocated); 1942 } 1943 EXPORT_SYMBOL(tcp_v4_destroy_sock); 1944 1945 #ifdef CONFIG_PROC_FS 1946 /* Proc filesystem TCP sock list dumping. */ 1947 1948 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) 1949 { 1950 return hlist_nulls_empty(head) ? NULL : 1951 list_entry(head->first, struct inet_timewait_sock, tw_node); 1952 } 1953 1954 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 1955 { 1956 return !is_a_nulls(tw->tw_node.next) ? 1957 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1958 } 1959 1960 /* 1961 * Get next listener socket follow cur. If cur is NULL, get first socket 1962 * starting from bucket given in st->bucket; when st->bucket is zero the 1963 * very first socket in the hash table is returned. 1964 */ 1965 static void *listening_get_next(struct seq_file *seq, void *cur) 1966 { 1967 struct inet_connection_sock *icsk; 1968 struct hlist_nulls_node *node; 1969 struct sock *sk = cur; 1970 struct inet_listen_hashbucket *ilb; 1971 struct tcp_iter_state *st = seq->private; 1972 struct net *net = seq_file_net(seq); 1973 1974 if (!sk) { 1975 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 1976 spin_lock_bh(&ilb->lock); 1977 sk = sk_nulls_head(&ilb->head); 1978 st->offset = 0; 1979 goto get_sk; 1980 } 1981 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 1982 ++st->num; 1983 ++st->offset; 1984 1985 if (st->state == TCP_SEQ_STATE_OPENREQ) { 1986 struct request_sock *req = cur; 1987 1988 icsk = inet_csk(st->syn_wait_sk); 1989 req = req->dl_next; 1990 while (1) { 1991 while (req) { 1992 if (req->rsk_ops->family == st->family) { 1993 cur = req; 1994 goto out; 1995 } 1996 req = req->dl_next; 1997 } 1998 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 1999 break; 2000 get_req: 2001 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 2002 } 2003 sk = sk_nulls_next(st->syn_wait_sk); 2004 st->state = TCP_SEQ_STATE_LISTENING; 2005 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2006 } else { 2007 icsk = inet_csk(sk); 2008 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2009 if (reqsk_queue_len(&icsk->icsk_accept_queue)) 2010 goto start_req; 2011 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2012 sk = sk_nulls_next(sk); 2013 } 2014 get_sk: 2015 sk_nulls_for_each_from(sk, node) { 2016 if (!net_eq(sock_net(sk), net)) 2017 continue; 2018 if (sk->sk_family == st->family) { 2019 cur = sk; 2020 goto out; 2021 } 2022 icsk = inet_csk(sk); 2023 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2024 if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 2025 start_req: 2026 st->uid = sock_i_uid(sk); 2027 st->syn_wait_sk = sk; 2028 st->state = TCP_SEQ_STATE_OPENREQ; 2029 st->sbucket = 0; 2030 goto get_req; 2031 } 2032 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2033 } 2034 spin_unlock_bh(&ilb->lock); 2035 st->offset = 0; 2036 if (++st->bucket < INET_LHTABLE_SIZE) { 2037 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2038 spin_lock_bh(&ilb->lock); 2039 sk = sk_nulls_head(&ilb->head); 2040 goto get_sk; 2041 } 2042 cur = NULL; 2043 out: 2044 return cur; 2045 } 2046 2047 static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 2048 { 2049 struct tcp_iter_state *st = seq->private; 2050 void *rc; 2051 2052 st->bucket = 0; 2053 st->offset = 0; 2054 rc = listening_get_next(seq, NULL); 2055 2056 while (rc && *pos) { 2057 rc = listening_get_next(seq, rc); 2058 --*pos; 2059 } 2060 return rc; 2061 } 2062 2063 static inline int empty_bucket(struct tcp_iter_state *st) 2064 { 2065 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && 2066 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2067 } 2068 2069 /* 2070 * Get first established socket starting from bucket given in st->bucket. 2071 * If st->bucket is zero, the very first socket in the hash is returned. 2072 */ 2073 static void *established_get_first(struct seq_file *seq) 2074 { 2075 struct tcp_iter_state *st = seq->private; 2076 struct net *net = seq_file_net(seq); 2077 void *rc = NULL; 2078 2079 st->offset = 0; 2080 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2081 struct sock *sk; 2082 struct hlist_nulls_node *node; 2083 struct inet_timewait_sock *tw; 2084 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 2085 2086 /* Lockless fast path for the common case of empty buckets */ 2087 if (empty_bucket(st)) 2088 continue; 2089 2090 spin_lock_bh(lock); 2091 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2092 if (sk->sk_family != st->family || 2093 !net_eq(sock_net(sk), net)) { 2094 continue; 2095 } 2096 rc = sk; 2097 goto out; 2098 } 2099 st->state = TCP_SEQ_STATE_TIME_WAIT; 2100 inet_twsk_for_each(tw, node, 2101 &tcp_hashinfo.ehash[st->bucket].twchain) { 2102 if (tw->tw_family != st->family || 2103 !net_eq(twsk_net(tw), net)) { 2104 continue; 2105 } 2106 rc = tw; 2107 goto out; 2108 } 2109 spin_unlock_bh(lock); 2110 st->state = TCP_SEQ_STATE_ESTABLISHED; 2111 } 2112 out: 2113 return rc; 2114 } 2115 2116 static void *established_get_next(struct seq_file *seq, void *cur) 2117 { 2118 struct sock *sk = cur; 2119 struct inet_timewait_sock *tw; 2120 struct hlist_nulls_node *node; 2121 struct tcp_iter_state *st = seq->private; 2122 struct net *net = seq_file_net(seq); 2123 2124 ++st->num; 2125 ++st->offset; 2126 2127 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2128 tw = cur; 2129 tw = tw_next(tw); 2130 get_tw: 2131 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { 2132 tw = tw_next(tw); 2133 } 2134 if (tw) { 2135 cur = tw; 2136 goto out; 2137 } 2138 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2139 st->state = TCP_SEQ_STATE_ESTABLISHED; 2140 2141 /* Look for next non empty bucket */ 2142 st->offset = 0; 2143 while (++st->bucket <= tcp_hashinfo.ehash_mask && 2144 empty_bucket(st)) 2145 ; 2146 if (st->bucket > tcp_hashinfo.ehash_mask) 2147 return NULL; 2148 2149 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2150 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); 2151 } else 2152 sk = sk_nulls_next(sk); 2153 2154 sk_nulls_for_each_from(sk, node) { 2155 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 2156 goto found; 2157 } 2158 2159 st->state = TCP_SEQ_STATE_TIME_WAIT; 2160 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 2161 goto get_tw; 2162 found: 2163 cur = sk; 2164 out: 2165 return cur; 2166 } 2167 2168 static void *established_get_idx(struct seq_file *seq, loff_t pos) 2169 { 2170 struct tcp_iter_state *st = seq->private; 2171 void *rc; 2172 2173 st->bucket = 0; 2174 rc = established_get_first(seq); 2175 2176 while (rc && pos) { 2177 rc = established_get_next(seq, rc); 2178 --pos; 2179 } 2180 return rc; 2181 } 2182 2183 static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 2184 { 2185 void *rc; 2186 struct tcp_iter_state *st = seq->private; 2187 2188 st->state = TCP_SEQ_STATE_LISTENING; 2189 rc = listening_get_idx(seq, &pos); 2190 2191 if (!rc) { 2192 st->state = TCP_SEQ_STATE_ESTABLISHED; 2193 rc = established_get_idx(seq, pos); 2194 } 2195 2196 return rc; 2197 } 2198 2199 static void *tcp_seek_last_pos(struct seq_file *seq) 2200 { 2201 struct tcp_iter_state *st = seq->private; 2202 int offset = st->offset; 2203 int orig_num = st->num; 2204 void *rc = NULL; 2205 2206 switch (st->state) { 2207 case TCP_SEQ_STATE_OPENREQ: 2208 case TCP_SEQ_STATE_LISTENING: 2209 if (st->bucket >= INET_LHTABLE_SIZE) 2210 break; 2211 st->state = TCP_SEQ_STATE_LISTENING; 2212 rc = listening_get_next(seq, NULL); 2213 while (offset-- && rc) 2214 rc = listening_get_next(seq, rc); 2215 if (rc) 2216 break; 2217 st->bucket = 0; 2218 /* Fallthrough */ 2219 case TCP_SEQ_STATE_ESTABLISHED: 2220 case TCP_SEQ_STATE_TIME_WAIT: 2221 st->state = TCP_SEQ_STATE_ESTABLISHED; 2222 if (st->bucket > tcp_hashinfo.ehash_mask) 2223 break; 2224 rc = established_get_first(seq); 2225 while (offset-- && rc) 2226 rc = established_get_next(seq, rc); 2227 } 2228 2229 st->num = orig_num; 2230 2231 return rc; 2232 } 2233 2234 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 2235 { 2236 struct tcp_iter_state *st = seq->private; 2237 void *rc; 2238 2239 if (*pos && *pos == st->last_pos) { 2240 rc = tcp_seek_last_pos(seq); 2241 if (rc) 2242 goto out; 2243 } 2244 2245 st->state = TCP_SEQ_STATE_LISTENING; 2246 st->num = 0; 2247 st->bucket = 0; 2248 st->offset = 0; 2249 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2250 2251 out: 2252 st->last_pos = *pos; 2253 return rc; 2254 } 2255 2256 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2257 { 2258 struct tcp_iter_state *st = seq->private; 2259 void *rc = NULL; 2260 2261 if (v == SEQ_START_TOKEN) { 2262 rc = tcp_get_idx(seq, 0); 2263 goto out; 2264 } 2265 2266 switch (st->state) { 2267 case TCP_SEQ_STATE_OPENREQ: 2268 case TCP_SEQ_STATE_LISTENING: 2269 rc = listening_get_next(seq, v); 2270 if (!rc) { 2271 st->state = TCP_SEQ_STATE_ESTABLISHED; 2272 st->bucket = 0; 2273 st->offset = 0; 2274 rc = established_get_first(seq); 2275 } 2276 break; 2277 case TCP_SEQ_STATE_ESTABLISHED: 2278 case TCP_SEQ_STATE_TIME_WAIT: 2279 rc = established_get_next(seq, v); 2280 break; 2281 } 2282 out: 2283 ++*pos; 2284 st->last_pos = *pos; 2285 return rc; 2286 } 2287 2288 static void tcp_seq_stop(struct seq_file *seq, void *v) 2289 { 2290 struct tcp_iter_state *st = seq->private; 2291 2292 switch (st->state) { 2293 case TCP_SEQ_STATE_OPENREQ: 2294 if (v) { 2295 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2296 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2297 } 2298 case TCP_SEQ_STATE_LISTENING: 2299 if (v != SEQ_START_TOKEN) 2300 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 2301 break; 2302 case TCP_SEQ_STATE_TIME_WAIT: 2303 case TCP_SEQ_STATE_ESTABLISHED: 2304 if (v) 2305 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2306 break; 2307 } 2308 } 2309 2310 static int tcp_seq_open(struct inode *inode, struct file *file) 2311 { 2312 struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 2313 struct tcp_iter_state *s; 2314 int err; 2315 2316 err = seq_open_net(inode, file, &afinfo->seq_ops, 2317 sizeof(struct tcp_iter_state)); 2318 if (err < 0) 2319 return err; 2320 2321 s = ((struct seq_file *)file->private_data)->private; 2322 s->family = afinfo->family; 2323 s->last_pos = 0; 2324 return 0; 2325 } 2326 2327 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 2328 { 2329 int rc = 0; 2330 struct proc_dir_entry *p; 2331 2332 afinfo->seq_fops.open = tcp_seq_open; 2333 afinfo->seq_fops.read = seq_read; 2334 afinfo->seq_fops.llseek = seq_lseek; 2335 afinfo->seq_fops.release = seq_release_net; 2336 2337 afinfo->seq_ops.start = tcp_seq_start; 2338 afinfo->seq_ops.next = tcp_seq_next; 2339 afinfo->seq_ops.stop = tcp_seq_stop; 2340 2341 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 2342 &afinfo->seq_fops, afinfo); 2343 if (!p) 2344 rc = -ENOMEM; 2345 return rc; 2346 } 2347 EXPORT_SYMBOL(tcp_proc_register); 2348 2349 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 2350 { 2351 proc_net_remove(net, afinfo->name); 2352 } 2353 EXPORT_SYMBOL(tcp_proc_unregister); 2354 2355 static void get_openreq4(struct sock *sk, struct request_sock *req, 2356 struct seq_file *f, int i, int uid, int *len) 2357 { 2358 const struct inet_request_sock *ireq = inet_rsk(req); 2359 int ttd = req->expires - jiffies; 2360 2361 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2362 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", 2363 i, 2364 ireq->loc_addr, 2365 ntohs(inet_sk(sk)->inet_sport), 2366 ireq->rmt_addr, 2367 ntohs(ireq->rmt_port), 2368 TCP_SYN_RECV, 2369 0, 0, /* could print option size, but that is af dependent. */ 2370 1, /* timers active (only the expire timer) */ 2371 jiffies_to_clock_t(ttd), 2372 req->retrans, 2373 uid, 2374 0, /* non standard timer */ 2375 0, /* open_requests have no inode */ 2376 atomic_read(&sk->sk_refcnt), 2377 req, 2378 len); 2379 } 2380 2381 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) 2382 { 2383 int timer_active; 2384 unsigned long timer_expires; 2385 struct tcp_sock *tp = tcp_sk(sk); 2386 const struct inet_connection_sock *icsk = inet_csk(sk); 2387 struct inet_sock *inet = inet_sk(sk); 2388 __be32 dest = inet->inet_daddr; 2389 __be32 src = inet->inet_rcv_saddr; 2390 __u16 destp = ntohs(inet->inet_dport); 2391 __u16 srcp = ntohs(inet->inet_sport); 2392 int rx_queue; 2393 2394 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 2395 timer_active = 1; 2396 timer_expires = icsk->icsk_timeout; 2397 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2398 timer_active = 4; 2399 timer_expires = icsk->icsk_timeout; 2400 } else if (timer_pending(&sk->sk_timer)) { 2401 timer_active = 2; 2402 timer_expires = sk->sk_timer.expires; 2403 } else { 2404 timer_active = 0; 2405 timer_expires = jiffies; 2406 } 2407 2408 if (sk->sk_state == TCP_LISTEN) 2409 rx_queue = sk->sk_ack_backlog; 2410 else 2411 /* 2412 * because we dont lock socket, we might find a transient negative value 2413 */ 2414 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2415 2416 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2417 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", 2418 i, src, srcp, dest, destp, sk->sk_state, 2419 tp->write_seq - tp->snd_una, 2420 rx_queue, 2421 timer_active, 2422 jiffies_to_clock_t(timer_expires - jiffies), 2423 icsk->icsk_retransmits, 2424 sock_i_uid(sk), 2425 icsk->icsk_probes_out, 2426 sock_i_ino(sk), 2427 atomic_read(&sk->sk_refcnt), sk, 2428 jiffies_to_clock_t(icsk->icsk_rto), 2429 jiffies_to_clock_t(icsk->icsk_ack.ato), 2430 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2431 tp->snd_cwnd, 2432 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, 2433 len); 2434 } 2435 2436 static void get_timewait4_sock(struct inet_timewait_sock *tw, 2437 struct seq_file *f, int i, int *len) 2438 { 2439 __be32 dest, src; 2440 __u16 destp, srcp; 2441 int ttd = tw->tw_ttd - jiffies; 2442 2443 if (ttd < 0) 2444 ttd = 0; 2445 2446 dest = tw->tw_daddr; 2447 src = tw->tw_rcv_saddr; 2448 destp = ntohs(tw->tw_dport); 2449 srcp = ntohs(tw->tw_sport); 2450 2451 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2452 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n", 2453 i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2454 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 2455 atomic_read(&tw->tw_refcnt), tw, len); 2456 } 2457 2458 #define TMPSZ 150 2459 2460 static int tcp4_seq_show(struct seq_file *seq, void *v) 2461 { 2462 struct tcp_iter_state *st; 2463 int len; 2464 2465 if (v == SEQ_START_TOKEN) { 2466 seq_printf(seq, "%-*s\n", TMPSZ - 1, 2467 " sl local_address rem_address st tx_queue " 2468 "rx_queue tr tm->when retrnsmt uid timeout " 2469 "inode"); 2470 goto out; 2471 } 2472 st = seq->private; 2473 2474 switch (st->state) { 2475 case TCP_SEQ_STATE_LISTENING: 2476 case TCP_SEQ_STATE_ESTABLISHED: 2477 get_tcp4_sock(v, seq, st->num, &len); 2478 break; 2479 case TCP_SEQ_STATE_OPENREQ: 2480 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); 2481 break; 2482 case TCP_SEQ_STATE_TIME_WAIT: 2483 get_timewait4_sock(v, seq, st->num, &len); 2484 break; 2485 } 2486 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); 2487 out: 2488 return 0; 2489 } 2490 2491 static struct tcp_seq_afinfo tcp4_seq_afinfo = { 2492 .name = "tcp", 2493 .family = AF_INET, 2494 .seq_fops = { 2495 .owner = THIS_MODULE, 2496 }, 2497 .seq_ops = { 2498 .show = tcp4_seq_show, 2499 }, 2500 }; 2501 2502 static int __net_init tcp4_proc_init_net(struct net *net) 2503 { 2504 return tcp_proc_register(net, &tcp4_seq_afinfo); 2505 } 2506 2507 static void __net_exit tcp4_proc_exit_net(struct net *net) 2508 { 2509 tcp_proc_unregister(net, &tcp4_seq_afinfo); 2510 } 2511 2512 static struct pernet_operations tcp4_net_ops = { 2513 .init = tcp4_proc_init_net, 2514 .exit = tcp4_proc_exit_net, 2515 }; 2516 2517 int __init tcp4_proc_init(void) 2518 { 2519 return register_pernet_subsys(&tcp4_net_ops); 2520 } 2521 2522 void tcp4_proc_exit(void) 2523 { 2524 unregister_pernet_subsys(&tcp4_net_ops); 2525 } 2526 #endif /* CONFIG_PROC_FS */ 2527 2528 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2529 { 2530 struct iphdr *iph = skb_gro_network_header(skb); 2531 2532 switch (skb->ip_summed) { 2533 case CHECKSUM_COMPLETE: 2534 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 2535 skb->csum)) { 2536 skb->ip_summed = CHECKSUM_UNNECESSARY; 2537 break; 2538 } 2539 2540 /* fall through */ 2541 case CHECKSUM_NONE: 2542 NAPI_GRO_CB(skb)->flush = 1; 2543 return NULL; 2544 } 2545 2546 return tcp_gro_receive(head, skb); 2547 } 2548 2549 int tcp4_gro_complete(struct sk_buff *skb) 2550 { 2551 struct iphdr *iph = ip_hdr(skb); 2552 struct tcphdr *th = tcp_hdr(skb); 2553 2554 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2555 iph->saddr, iph->daddr, 0); 2556 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 2557 2558 return tcp_gro_complete(skb); 2559 } 2560 2561 struct proto tcp_prot = { 2562 .name = "TCP", 2563 .owner = THIS_MODULE, 2564 .close = tcp_close, 2565 .connect = tcp_v4_connect, 2566 .disconnect = tcp_disconnect, 2567 .accept = inet_csk_accept, 2568 .ioctl = tcp_ioctl, 2569 .init = tcp_v4_init_sock, 2570 .destroy = tcp_v4_destroy_sock, 2571 .shutdown = tcp_shutdown, 2572 .setsockopt = tcp_setsockopt, 2573 .getsockopt = tcp_getsockopt, 2574 .recvmsg = tcp_recvmsg, 2575 .sendmsg = tcp_sendmsg, 2576 .sendpage = tcp_sendpage, 2577 .backlog_rcv = tcp_v4_do_rcv, 2578 .hash = inet_hash, 2579 .unhash = inet_unhash, 2580 .get_port = inet_csk_get_port, 2581 .enter_memory_pressure = tcp_enter_memory_pressure, 2582 .sockets_allocated = &tcp_sockets_allocated, 2583 .orphan_count = &tcp_orphan_count, 2584 .memory_allocated = &tcp_memory_allocated, 2585 .memory_pressure = &tcp_memory_pressure, 2586 .sysctl_mem = sysctl_tcp_mem, 2587 .sysctl_wmem = sysctl_tcp_wmem, 2588 .sysctl_rmem = sysctl_tcp_rmem, 2589 .max_header = MAX_TCP_HEADER, 2590 .obj_size = sizeof(struct tcp_sock), 2591 .slab_flags = SLAB_DESTROY_BY_RCU, 2592 .twsk_prot = &tcp_timewait_sock_ops, 2593 .rsk_prot = &tcp_request_sock_ops, 2594 .h.hashinfo = &tcp_hashinfo, 2595 .no_autobind = true, 2596 #ifdef CONFIG_COMPAT 2597 .compat_setsockopt = compat_tcp_setsockopt, 2598 .compat_getsockopt = compat_tcp_getsockopt, 2599 #endif 2600 }; 2601 EXPORT_SYMBOL(tcp_prot); 2602 2603 2604 static int __net_init tcp_sk_init(struct net *net) 2605 { 2606 return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2607 PF_INET, SOCK_RAW, IPPROTO_TCP, net); 2608 } 2609 2610 static void __net_exit tcp_sk_exit(struct net *net) 2611 { 2612 inet_ctl_sock_destroy(net->ipv4.tcp_sock); 2613 } 2614 2615 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2616 { 2617 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2618 } 2619 2620 static struct pernet_operations __net_initdata tcp_sk_ops = { 2621 .init = tcp_sk_init, 2622 .exit = tcp_sk_exit, 2623 .exit_batch = tcp_sk_exit_batch, 2624 }; 2625 2626 void __init tcp_v4_init(void) 2627 { 2628 inet_hashinfo_init(&tcp_hashinfo); 2629 if (register_pernet_subsys(&tcp_sk_ops)) 2630 panic("Failed to create the TCP control socket.\n"); 2631 } 2632