xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision fd589a8f)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 
54 #include <linux/bottom_half.h>
55 #include <linux/types.h>
56 #include <linux/fcntl.h>
57 #include <linux/module.h>
58 #include <linux/random.h>
59 #include <linux/cache.h>
60 #include <linux/jhash.h>
61 #include <linux/init.h>
62 #include <linux/times.h>
63 
64 #include <net/net_namespace.h>
65 #include <net/icmp.h>
66 #include <net/inet_hashtables.h>
67 #include <net/tcp.h>
68 #include <net/transp_v6.h>
69 #include <net/ipv6.h>
70 #include <net/inet_common.h>
71 #include <net/timewait_sock.h>
72 #include <net/xfrm.h>
73 #include <net/netdma.h>
74 
75 #include <linux/inet.h>
76 #include <linux/ipv6.h>
77 #include <linux/stddef.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 
81 #include <linux/crypto.h>
82 #include <linux/scatterlist.h>
83 
84 int sysctl_tcp_tw_reuse __read_mostly;
85 int sysctl_tcp_low_latency __read_mostly;
86 
87 
88 #ifdef CONFIG_TCP_MD5SIG
89 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
90 						   __be32 addr);
91 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
92 			       __be32 daddr, __be32 saddr, struct tcphdr *th);
93 #else
94 static inline
95 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
96 {
97 	return NULL;
98 }
99 #endif
100 
101 struct inet_hashinfo tcp_hashinfo;
102 
103 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
104 {
105 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
106 					  ip_hdr(skb)->saddr,
107 					  tcp_hdr(skb)->dest,
108 					  tcp_hdr(skb)->source);
109 }
110 
111 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112 {
113 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
114 	struct tcp_sock *tp = tcp_sk(sk);
115 
116 	/* With PAWS, it is safe from the viewpoint
117 	   of data integrity. Even without PAWS it is safe provided sequence
118 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
119 
120 	   Actually, the idea is close to VJ's one, only timestamp cache is
121 	   held not per host, but per port pair and TW bucket is used as state
122 	   holder.
123 
124 	   If TW bucket has been already destroyed we fall back to VJ's scheme
125 	   and use initial timestamp retrieved from peer table.
126 	 */
127 	if (tcptw->tw_ts_recent_stamp &&
128 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
129 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
130 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
131 		if (tp->write_seq == 0)
132 			tp->write_seq = 1;
133 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
134 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
135 		sock_hold(sktw);
136 		return 1;
137 	}
138 
139 	return 0;
140 }
141 
142 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
143 
144 /* This will initiate an outgoing connection. */
145 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
146 {
147 	struct inet_sock *inet = inet_sk(sk);
148 	struct tcp_sock *tp = tcp_sk(sk);
149 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
150 	struct rtable *rt;
151 	__be32 daddr, nexthop;
152 	int tmp;
153 	int err;
154 
155 	if (addr_len < sizeof(struct sockaddr_in))
156 		return -EINVAL;
157 
158 	if (usin->sin_family != AF_INET)
159 		return -EAFNOSUPPORT;
160 
161 	nexthop = daddr = usin->sin_addr.s_addr;
162 	if (inet->opt && inet->opt->srr) {
163 		if (!daddr)
164 			return -EINVAL;
165 		nexthop = inet->opt->faddr;
166 	}
167 
168 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
169 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
170 			       IPPROTO_TCP,
171 			       inet->sport, usin->sin_port, sk, 1);
172 	if (tmp < 0) {
173 		if (tmp == -ENETUNREACH)
174 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
175 		return tmp;
176 	}
177 
178 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
179 		ip_rt_put(rt);
180 		return -ENETUNREACH;
181 	}
182 
183 	if (!inet->opt || !inet->opt->srr)
184 		daddr = rt->rt_dst;
185 
186 	if (!inet->saddr)
187 		inet->saddr = rt->rt_src;
188 	inet->rcv_saddr = inet->saddr;
189 
190 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
191 		/* Reset inherited state */
192 		tp->rx_opt.ts_recent	   = 0;
193 		tp->rx_opt.ts_recent_stamp = 0;
194 		tp->write_seq		   = 0;
195 	}
196 
197 	if (tcp_death_row.sysctl_tw_recycle &&
198 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
199 		struct inet_peer *peer = rt_get_peer(rt);
200 		/*
201 		 * VJ's idea. We save last timestamp seen from
202 		 * the destination in peer table, when entering state
203 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
204 		 * when trying new connection.
205 		 */
206 		if (peer != NULL &&
207 		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
208 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
209 			tp->rx_opt.ts_recent = peer->tcp_ts;
210 		}
211 	}
212 
213 	inet->dport = usin->sin_port;
214 	inet->daddr = daddr;
215 
216 	inet_csk(sk)->icsk_ext_hdr_len = 0;
217 	if (inet->opt)
218 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
219 
220 	tp->rx_opt.mss_clamp = 536;
221 
222 	/* Socket identity is still unknown (sport may be zero).
223 	 * However we set state to SYN-SENT and not releasing socket
224 	 * lock select source port, enter ourselves into the hash tables and
225 	 * complete initialization after this.
226 	 */
227 	tcp_set_state(sk, TCP_SYN_SENT);
228 	err = inet_hash_connect(&tcp_death_row, sk);
229 	if (err)
230 		goto failure;
231 
232 	err = ip_route_newports(&rt, IPPROTO_TCP,
233 				inet->sport, inet->dport, sk);
234 	if (err)
235 		goto failure;
236 
237 	/* OK, now commit destination to socket.  */
238 	sk->sk_gso_type = SKB_GSO_TCPV4;
239 	sk_setup_caps(sk, &rt->u.dst);
240 
241 	if (!tp->write_seq)
242 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
243 							   inet->daddr,
244 							   inet->sport,
245 							   usin->sin_port);
246 
247 	inet->id = tp->write_seq ^ jiffies;
248 
249 	err = tcp_connect(sk);
250 	rt = NULL;
251 	if (err)
252 		goto failure;
253 
254 	return 0;
255 
256 failure:
257 	/*
258 	 * This unhashes the socket and releases the local port,
259 	 * if necessary.
260 	 */
261 	tcp_set_state(sk, TCP_CLOSE);
262 	ip_rt_put(rt);
263 	sk->sk_route_caps = 0;
264 	inet->dport = 0;
265 	return err;
266 }
267 
268 /*
269  * This routine does path mtu discovery as defined in RFC1191.
270  */
271 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
272 {
273 	struct dst_entry *dst;
274 	struct inet_sock *inet = inet_sk(sk);
275 
276 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
277 	 * send out by Linux are always <576bytes so they should go through
278 	 * unfragmented).
279 	 */
280 	if (sk->sk_state == TCP_LISTEN)
281 		return;
282 
283 	/* We don't check in the destentry if pmtu discovery is forbidden
284 	 * on this route. We just assume that no packet_to_big packets
285 	 * are send back when pmtu discovery is not active.
286 	 * There is a small race when the user changes this flag in the
287 	 * route, but I think that's acceptable.
288 	 */
289 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
290 		return;
291 
292 	dst->ops->update_pmtu(dst, mtu);
293 
294 	/* Something is about to be wrong... Remember soft error
295 	 * for the case, if this connection will not able to recover.
296 	 */
297 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
298 		sk->sk_err_soft = EMSGSIZE;
299 
300 	mtu = dst_mtu(dst);
301 
302 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
303 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
304 		tcp_sync_mss(sk, mtu);
305 
306 		/* Resend the TCP packet because it's
307 		 * clear that the old packet has been
308 		 * dropped. This is the new "fast" path mtu
309 		 * discovery.
310 		 */
311 		tcp_simple_retransmit(sk);
312 	} /* else let the usual retransmit timer handle it */
313 }
314 
315 /*
316  * This routine is called by the ICMP module when it gets some
317  * sort of error condition.  If err < 0 then the socket should
318  * be closed and the error returned to the user.  If err > 0
319  * it's just the icmp type << 8 | icmp code.  After adjustment
320  * header points to the first 8 bytes of the tcp header.  We need
321  * to find the appropriate port.
322  *
323  * The locking strategy used here is very "optimistic". When
324  * someone else accesses the socket the ICMP is just dropped
325  * and for some paths there is no check at all.
326  * A more general error queue to queue errors for later handling
327  * is probably better.
328  *
329  */
330 
331 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
332 {
333 	struct iphdr *iph = (struct iphdr *)icmp_skb->data;
334 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
335 	struct inet_connection_sock *icsk;
336 	struct tcp_sock *tp;
337 	struct inet_sock *inet;
338 	const int type = icmp_hdr(icmp_skb)->type;
339 	const int code = icmp_hdr(icmp_skb)->code;
340 	struct sock *sk;
341 	struct sk_buff *skb;
342 	__u32 seq;
343 	__u32 remaining;
344 	int err;
345 	struct net *net = dev_net(icmp_skb->dev);
346 
347 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
348 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
349 		return;
350 	}
351 
352 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
353 			iph->saddr, th->source, inet_iif(icmp_skb));
354 	if (!sk) {
355 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
356 		return;
357 	}
358 	if (sk->sk_state == TCP_TIME_WAIT) {
359 		inet_twsk_put(inet_twsk(sk));
360 		return;
361 	}
362 
363 	bh_lock_sock(sk);
364 	/* If too many ICMPs get dropped on busy
365 	 * servers this needs to be solved differently.
366 	 */
367 	if (sock_owned_by_user(sk))
368 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
369 
370 	if (sk->sk_state == TCP_CLOSE)
371 		goto out;
372 
373 	icsk = inet_csk(sk);
374 	tp = tcp_sk(sk);
375 	seq = ntohl(th->seq);
376 	if (sk->sk_state != TCP_LISTEN &&
377 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
378 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
379 		goto out;
380 	}
381 
382 	switch (type) {
383 	case ICMP_SOURCE_QUENCH:
384 		/* Just silently ignore these. */
385 		goto out;
386 	case ICMP_PARAMETERPROB:
387 		err = EPROTO;
388 		break;
389 	case ICMP_DEST_UNREACH:
390 		if (code > NR_ICMP_UNREACH)
391 			goto out;
392 
393 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
394 			if (!sock_owned_by_user(sk))
395 				do_pmtu_discovery(sk, iph, info);
396 			goto out;
397 		}
398 
399 		err = icmp_err_convert[code].errno;
400 		/* check if icmp_skb allows revert of backoff
401 		 * (see draft-zimmermann-tcp-lcd) */
402 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
403 			break;
404 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
405 		    !icsk->icsk_backoff)
406 			break;
407 
408 		icsk->icsk_backoff--;
409 		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
410 					 icsk->icsk_backoff;
411 		tcp_bound_rto(sk);
412 
413 		skb = tcp_write_queue_head(sk);
414 		BUG_ON(!skb);
415 
416 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
417 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
418 
419 		if (remaining) {
420 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
421 						  remaining, TCP_RTO_MAX);
422 		} else if (sock_owned_by_user(sk)) {
423 			/* RTO revert clocked out retransmission,
424 			 * but socket is locked. Will defer. */
425 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
426 						  HZ/20, TCP_RTO_MAX);
427 		} else {
428 			/* RTO revert clocked out retransmission.
429 			 * Will retransmit now */
430 			tcp_retransmit_timer(sk);
431 		}
432 
433 		break;
434 	case ICMP_TIME_EXCEEDED:
435 		err = EHOSTUNREACH;
436 		break;
437 	default:
438 		goto out;
439 	}
440 
441 	switch (sk->sk_state) {
442 		struct request_sock *req, **prev;
443 	case TCP_LISTEN:
444 		if (sock_owned_by_user(sk))
445 			goto out;
446 
447 		req = inet_csk_search_req(sk, &prev, th->dest,
448 					  iph->daddr, iph->saddr);
449 		if (!req)
450 			goto out;
451 
452 		/* ICMPs are not backlogged, hence we cannot get
453 		   an established socket here.
454 		 */
455 		WARN_ON(req->sk);
456 
457 		if (seq != tcp_rsk(req)->snt_isn) {
458 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
459 			goto out;
460 		}
461 
462 		/*
463 		 * Still in SYN_RECV, just remove it silently.
464 		 * There is no good way to pass the error to the newly
465 		 * created socket, and POSIX does not want network
466 		 * errors returned from accept().
467 		 */
468 		inet_csk_reqsk_queue_drop(sk, req, prev);
469 		goto out;
470 
471 	case TCP_SYN_SENT:
472 	case TCP_SYN_RECV:  /* Cannot happen.
473 			       It can f.e. if SYNs crossed.
474 			     */
475 		if (!sock_owned_by_user(sk)) {
476 			sk->sk_err = err;
477 
478 			sk->sk_error_report(sk);
479 
480 			tcp_done(sk);
481 		} else {
482 			sk->sk_err_soft = err;
483 		}
484 		goto out;
485 	}
486 
487 	/* If we've already connected we will keep trying
488 	 * until we time out, or the user gives up.
489 	 *
490 	 * rfc1122 4.2.3.9 allows to consider as hard errors
491 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
492 	 * but it is obsoleted by pmtu discovery).
493 	 *
494 	 * Note, that in modern internet, where routing is unreliable
495 	 * and in each dark corner broken firewalls sit, sending random
496 	 * errors ordered by their masters even this two messages finally lose
497 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
498 	 *
499 	 * Now we are in compliance with RFCs.
500 	 *							--ANK (980905)
501 	 */
502 
503 	inet = inet_sk(sk);
504 	if (!sock_owned_by_user(sk) && inet->recverr) {
505 		sk->sk_err = err;
506 		sk->sk_error_report(sk);
507 	} else	{ /* Only an error on timeout */
508 		sk->sk_err_soft = err;
509 	}
510 
511 out:
512 	bh_unlock_sock(sk);
513 	sock_put(sk);
514 }
515 
516 /* This routine computes an IPv4 TCP checksum. */
517 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
518 {
519 	struct inet_sock *inet = inet_sk(sk);
520 	struct tcphdr *th = tcp_hdr(skb);
521 
522 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
523 		th->check = ~tcp_v4_check(len, inet->saddr,
524 					  inet->daddr, 0);
525 		skb->csum_start = skb_transport_header(skb) - skb->head;
526 		skb->csum_offset = offsetof(struct tcphdr, check);
527 	} else {
528 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
529 					 csum_partial(th,
530 						      th->doff << 2,
531 						      skb->csum));
532 	}
533 }
534 
535 int tcp_v4_gso_send_check(struct sk_buff *skb)
536 {
537 	const struct iphdr *iph;
538 	struct tcphdr *th;
539 
540 	if (!pskb_may_pull(skb, sizeof(*th)))
541 		return -EINVAL;
542 
543 	iph = ip_hdr(skb);
544 	th = tcp_hdr(skb);
545 
546 	th->check = 0;
547 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
548 	skb->csum_start = skb_transport_header(skb) - skb->head;
549 	skb->csum_offset = offsetof(struct tcphdr, check);
550 	skb->ip_summed = CHECKSUM_PARTIAL;
551 	return 0;
552 }
553 
554 /*
555  *	This routine will send an RST to the other tcp.
556  *
557  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
558  *		      for reset.
559  *	Answer: if a packet caused RST, it is not for a socket
560  *		existing in our system, if it is matched to a socket,
561  *		it is just duplicate segment or bug in other side's TCP.
562  *		So that we build reply only basing on parameters
563  *		arrived with segment.
564  *	Exception: precedence violation. We do not implement it in any case.
565  */
566 
567 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
568 {
569 	struct tcphdr *th = tcp_hdr(skb);
570 	struct {
571 		struct tcphdr th;
572 #ifdef CONFIG_TCP_MD5SIG
573 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
574 #endif
575 	} rep;
576 	struct ip_reply_arg arg;
577 #ifdef CONFIG_TCP_MD5SIG
578 	struct tcp_md5sig_key *key;
579 #endif
580 	struct net *net;
581 
582 	/* Never send a reset in response to a reset. */
583 	if (th->rst)
584 		return;
585 
586 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
587 		return;
588 
589 	/* Swap the send and the receive. */
590 	memset(&rep, 0, sizeof(rep));
591 	rep.th.dest   = th->source;
592 	rep.th.source = th->dest;
593 	rep.th.doff   = sizeof(struct tcphdr) / 4;
594 	rep.th.rst    = 1;
595 
596 	if (th->ack) {
597 		rep.th.seq = th->ack_seq;
598 	} else {
599 		rep.th.ack = 1;
600 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
601 				       skb->len - (th->doff << 2));
602 	}
603 
604 	memset(&arg, 0, sizeof(arg));
605 	arg.iov[0].iov_base = (unsigned char *)&rep;
606 	arg.iov[0].iov_len  = sizeof(rep.th);
607 
608 #ifdef CONFIG_TCP_MD5SIG
609 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
610 	if (key) {
611 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
612 				   (TCPOPT_NOP << 16) |
613 				   (TCPOPT_MD5SIG << 8) |
614 				   TCPOLEN_MD5SIG);
615 		/* Update length and the length the header thinks exists */
616 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
617 		rep.th.doff = arg.iov[0].iov_len / 4;
618 
619 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
620 				     key, ip_hdr(skb)->saddr,
621 				     ip_hdr(skb)->daddr, &rep.th);
622 	}
623 #endif
624 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
625 				      ip_hdr(skb)->saddr, /* XXX */
626 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
627 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
628 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
629 
630 	net = dev_net(skb_dst(skb)->dev);
631 	ip_send_reply(net->ipv4.tcp_sock, skb,
632 		      &arg, arg.iov[0].iov_len);
633 
634 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
635 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
636 }
637 
638 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
639    outside socket context is ugly, certainly. What can I do?
640  */
641 
642 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
643 			    u32 win, u32 ts, int oif,
644 			    struct tcp_md5sig_key *key,
645 			    int reply_flags)
646 {
647 	struct tcphdr *th = tcp_hdr(skb);
648 	struct {
649 		struct tcphdr th;
650 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
651 #ifdef CONFIG_TCP_MD5SIG
652 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
653 #endif
654 			];
655 	} rep;
656 	struct ip_reply_arg arg;
657 	struct net *net = dev_net(skb_dst(skb)->dev);
658 
659 	memset(&rep.th, 0, sizeof(struct tcphdr));
660 	memset(&arg, 0, sizeof(arg));
661 
662 	arg.iov[0].iov_base = (unsigned char *)&rep;
663 	arg.iov[0].iov_len  = sizeof(rep.th);
664 	if (ts) {
665 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
666 				   (TCPOPT_TIMESTAMP << 8) |
667 				   TCPOLEN_TIMESTAMP);
668 		rep.opt[1] = htonl(tcp_time_stamp);
669 		rep.opt[2] = htonl(ts);
670 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
671 	}
672 
673 	/* Swap the send and the receive. */
674 	rep.th.dest    = th->source;
675 	rep.th.source  = th->dest;
676 	rep.th.doff    = arg.iov[0].iov_len / 4;
677 	rep.th.seq     = htonl(seq);
678 	rep.th.ack_seq = htonl(ack);
679 	rep.th.ack     = 1;
680 	rep.th.window  = htons(win);
681 
682 #ifdef CONFIG_TCP_MD5SIG
683 	if (key) {
684 		int offset = (ts) ? 3 : 0;
685 
686 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
687 					  (TCPOPT_NOP << 16) |
688 					  (TCPOPT_MD5SIG << 8) |
689 					  TCPOLEN_MD5SIG);
690 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
691 		rep.th.doff = arg.iov[0].iov_len/4;
692 
693 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
694 				    key, ip_hdr(skb)->saddr,
695 				    ip_hdr(skb)->daddr, &rep.th);
696 	}
697 #endif
698 	arg.flags = reply_flags;
699 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
700 				      ip_hdr(skb)->saddr, /* XXX */
701 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
702 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
703 	if (oif)
704 		arg.bound_dev_if = oif;
705 
706 	ip_send_reply(net->ipv4.tcp_sock, skb,
707 		      &arg, arg.iov[0].iov_len);
708 
709 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
710 }
711 
712 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
713 {
714 	struct inet_timewait_sock *tw = inet_twsk(sk);
715 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
716 
717 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
718 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
719 			tcptw->tw_ts_recent,
720 			tw->tw_bound_dev_if,
721 			tcp_twsk_md5_key(tcptw),
722 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
723 			);
724 
725 	inet_twsk_put(tw);
726 }
727 
728 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
729 				  struct request_sock *req)
730 {
731 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
732 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
733 			req->ts_recent,
734 			0,
735 			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
736 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
737 }
738 
739 /*
740  *	Send a SYN-ACK after having received a SYN.
741  *	This still operates on a request_sock only, not on a big
742  *	socket.
743  */
744 static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
745 				struct dst_entry *dst)
746 {
747 	const struct inet_request_sock *ireq = inet_rsk(req);
748 	int err = -1;
749 	struct sk_buff * skb;
750 
751 	/* First, grab a route. */
752 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
753 		return -1;
754 
755 	skb = tcp_make_synack(sk, dst, req);
756 
757 	if (skb) {
758 		struct tcphdr *th = tcp_hdr(skb);
759 
760 		th->check = tcp_v4_check(skb->len,
761 					 ireq->loc_addr,
762 					 ireq->rmt_addr,
763 					 csum_partial(th, skb->len,
764 						      skb->csum));
765 
766 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
767 					    ireq->rmt_addr,
768 					    ireq->opt);
769 		err = net_xmit_eval(err);
770 	}
771 
772 	dst_release(dst);
773 	return err;
774 }
775 
776 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
777 {
778 	return __tcp_v4_send_synack(sk, req, NULL);
779 }
780 
781 /*
782  *	IPv4 request_sock destructor.
783  */
784 static void tcp_v4_reqsk_destructor(struct request_sock *req)
785 {
786 	kfree(inet_rsk(req)->opt);
787 }
788 
789 #ifdef CONFIG_SYN_COOKIES
790 static void syn_flood_warning(struct sk_buff *skb)
791 {
792 	static unsigned long warntime;
793 
794 	if (time_after(jiffies, (warntime + HZ * 60))) {
795 		warntime = jiffies;
796 		printk(KERN_INFO
797 		       "possible SYN flooding on port %d. Sending cookies.\n",
798 		       ntohs(tcp_hdr(skb)->dest));
799 	}
800 }
801 #endif
802 
803 /*
804  * Save and compile IPv4 options into the request_sock if needed.
805  */
806 static struct ip_options *tcp_v4_save_options(struct sock *sk,
807 					      struct sk_buff *skb)
808 {
809 	struct ip_options *opt = &(IPCB(skb)->opt);
810 	struct ip_options *dopt = NULL;
811 
812 	if (opt && opt->optlen) {
813 		int opt_size = optlength(opt);
814 		dopt = kmalloc(opt_size, GFP_ATOMIC);
815 		if (dopt) {
816 			if (ip_options_echo(dopt, skb)) {
817 				kfree(dopt);
818 				dopt = NULL;
819 			}
820 		}
821 	}
822 	return dopt;
823 }
824 
825 #ifdef CONFIG_TCP_MD5SIG
826 /*
827  * RFC2385 MD5 checksumming requires a mapping of
828  * IP address->MD5 Key.
829  * We need to maintain these in the sk structure.
830  */
831 
832 /* Find the Key structure for an address.  */
833 static struct tcp_md5sig_key *
834 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
835 {
836 	struct tcp_sock *tp = tcp_sk(sk);
837 	int i;
838 
839 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
840 		return NULL;
841 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
842 		if (tp->md5sig_info->keys4[i].addr == addr)
843 			return &tp->md5sig_info->keys4[i].base;
844 	}
845 	return NULL;
846 }
847 
848 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
849 					 struct sock *addr_sk)
850 {
851 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
852 }
853 
854 EXPORT_SYMBOL(tcp_v4_md5_lookup);
855 
856 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
857 						      struct request_sock *req)
858 {
859 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
860 }
861 
862 /* This can be called on a newly created socket, from other files */
863 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
864 		      u8 *newkey, u8 newkeylen)
865 {
866 	/* Add Key to the list */
867 	struct tcp_md5sig_key *key;
868 	struct tcp_sock *tp = tcp_sk(sk);
869 	struct tcp4_md5sig_key *keys;
870 
871 	key = tcp_v4_md5_do_lookup(sk, addr);
872 	if (key) {
873 		/* Pre-existing entry - just update that one. */
874 		kfree(key->key);
875 		key->key = newkey;
876 		key->keylen = newkeylen;
877 	} else {
878 		struct tcp_md5sig_info *md5sig;
879 
880 		if (!tp->md5sig_info) {
881 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
882 						  GFP_ATOMIC);
883 			if (!tp->md5sig_info) {
884 				kfree(newkey);
885 				return -ENOMEM;
886 			}
887 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
888 		}
889 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
890 			kfree(newkey);
891 			return -ENOMEM;
892 		}
893 		md5sig = tp->md5sig_info;
894 
895 		if (md5sig->alloced4 == md5sig->entries4) {
896 			keys = kmalloc((sizeof(*keys) *
897 					(md5sig->entries4 + 1)), GFP_ATOMIC);
898 			if (!keys) {
899 				kfree(newkey);
900 				tcp_free_md5sig_pool();
901 				return -ENOMEM;
902 			}
903 
904 			if (md5sig->entries4)
905 				memcpy(keys, md5sig->keys4,
906 				       sizeof(*keys) * md5sig->entries4);
907 
908 			/* Free old key list, and reference new one */
909 			kfree(md5sig->keys4);
910 			md5sig->keys4 = keys;
911 			md5sig->alloced4++;
912 		}
913 		md5sig->entries4++;
914 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
915 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
916 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
917 	}
918 	return 0;
919 }
920 
921 EXPORT_SYMBOL(tcp_v4_md5_do_add);
922 
923 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
924 			       u8 *newkey, u8 newkeylen)
925 {
926 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
927 				 newkey, newkeylen);
928 }
929 
930 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
931 {
932 	struct tcp_sock *tp = tcp_sk(sk);
933 	int i;
934 
935 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
936 		if (tp->md5sig_info->keys4[i].addr == addr) {
937 			/* Free the key */
938 			kfree(tp->md5sig_info->keys4[i].base.key);
939 			tp->md5sig_info->entries4--;
940 
941 			if (tp->md5sig_info->entries4 == 0) {
942 				kfree(tp->md5sig_info->keys4);
943 				tp->md5sig_info->keys4 = NULL;
944 				tp->md5sig_info->alloced4 = 0;
945 			} else if (tp->md5sig_info->entries4 != i) {
946 				/* Need to do some manipulation */
947 				memmove(&tp->md5sig_info->keys4[i],
948 					&tp->md5sig_info->keys4[i+1],
949 					(tp->md5sig_info->entries4 - i) *
950 					 sizeof(struct tcp4_md5sig_key));
951 			}
952 			tcp_free_md5sig_pool();
953 			return 0;
954 		}
955 	}
956 	return -ENOENT;
957 }
958 
959 EXPORT_SYMBOL(tcp_v4_md5_do_del);
960 
961 static void tcp_v4_clear_md5_list(struct sock *sk)
962 {
963 	struct tcp_sock *tp = tcp_sk(sk);
964 
965 	/* Free each key, then the set of key keys,
966 	 * the crypto element, and then decrement our
967 	 * hold on the last resort crypto.
968 	 */
969 	if (tp->md5sig_info->entries4) {
970 		int i;
971 		for (i = 0; i < tp->md5sig_info->entries4; i++)
972 			kfree(tp->md5sig_info->keys4[i].base.key);
973 		tp->md5sig_info->entries4 = 0;
974 		tcp_free_md5sig_pool();
975 	}
976 	if (tp->md5sig_info->keys4) {
977 		kfree(tp->md5sig_info->keys4);
978 		tp->md5sig_info->keys4 = NULL;
979 		tp->md5sig_info->alloced4  = 0;
980 	}
981 }
982 
983 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
984 				 int optlen)
985 {
986 	struct tcp_md5sig cmd;
987 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
988 	u8 *newkey;
989 
990 	if (optlen < sizeof(cmd))
991 		return -EINVAL;
992 
993 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
994 		return -EFAULT;
995 
996 	if (sin->sin_family != AF_INET)
997 		return -EINVAL;
998 
999 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1000 		if (!tcp_sk(sk)->md5sig_info)
1001 			return -ENOENT;
1002 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1003 	}
1004 
1005 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1006 		return -EINVAL;
1007 
1008 	if (!tcp_sk(sk)->md5sig_info) {
1009 		struct tcp_sock *tp = tcp_sk(sk);
1010 		struct tcp_md5sig_info *p;
1011 
1012 		p = kzalloc(sizeof(*p), sk->sk_allocation);
1013 		if (!p)
1014 			return -EINVAL;
1015 
1016 		tp->md5sig_info = p;
1017 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1018 	}
1019 
1020 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1021 	if (!newkey)
1022 		return -ENOMEM;
1023 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1024 				 newkey, cmd.tcpm_keylen);
1025 }
1026 
1027 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1028 					__be32 daddr, __be32 saddr, int nbytes)
1029 {
1030 	struct tcp4_pseudohdr *bp;
1031 	struct scatterlist sg;
1032 
1033 	bp = &hp->md5_blk.ip4;
1034 
1035 	/*
1036 	 * 1. the TCP pseudo-header (in the order: source IP address,
1037 	 * destination IP address, zero-padded protocol number, and
1038 	 * segment length)
1039 	 */
1040 	bp->saddr = saddr;
1041 	bp->daddr = daddr;
1042 	bp->pad = 0;
1043 	bp->protocol = IPPROTO_TCP;
1044 	bp->len = cpu_to_be16(nbytes);
1045 
1046 	sg_init_one(&sg, bp, sizeof(*bp));
1047 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1048 }
1049 
1050 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1051 			       __be32 daddr, __be32 saddr, struct tcphdr *th)
1052 {
1053 	struct tcp_md5sig_pool *hp;
1054 	struct hash_desc *desc;
1055 
1056 	hp = tcp_get_md5sig_pool();
1057 	if (!hp)
1058 		goto clear_hash_noput;
1059 	desc = &hp->md5_desc;
1060 
1061 	if (crypto_hash_init(desc))
1062 		goto clear_hash;
1063 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1064 		goto clear_hash;
1065 	if (tcp_md5_hash_header(hp, th))
1066 		goto clear_hash;
1067 	if (tcp_md5_hash_key(hp, key))
1068 		goto clear_hash;
1069 	if (crypto_hash_final(desc, md5_hash))
1070 		goto clear_hash;
1071 
1072 	tcp_put_md5sig_pool();
1073 	return 0;
1074 
1075 clear_hash:
1076 	tcp_put_md5sig_pool();
1077 clear_hash_noput:
1078 	memset(md5_hash, 0, 16);
1079 	return 1;
1080 }
1081 
1082 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1083 			struct sock *sk, struct request_sock *req,
1084 			struct sk_buff *skb)
1085 {
1086 	struct tcp_md5sig_pool *hp;
1087 	struct hash_desc *desc;
1088 	struct tcphdr *th = tcp_hdr(skb);
1089 	__be32 saddr, daddr;
1090 
1091 	if (sk) {
1092 		saddr = inet_sk(sk)->saddr;
1093 		daddr = inet_sk(sk)->daddr;
1094 	} else if (req) {
1095 		saddr = inet_rsk(req)->loc_addr;
1096 		daddr = inet_rsk(req)->rmt_addr;
1097 	} else {
1098 		const struct iphdr *iph = ip_hdr(skb);
1099 		saddr = iph->saddr;
1100 		daddr = iph->daddr;
1101 	}
1102 
1103 	hp = tcp_get_md5sig_pool();
1104 	if (!hp)
1105 		goto clear_hash_noput;
1106 	desc = &hp->md5_desc;
1107 
1108 	if (crypto_hash_init(desc))
1109 		goto clear_hash;
1110 
1111 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1112 		goto clear_hash;
1113 	if (tcp_md5_hash_header(hp, th))
1114 		goto clear_hash;
1115 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1116 		goto clear_hash;
1117 	if (tcp_md5_hash_key(hp, key))
1118 		goto clear_hash;
1119 	if (crypto_hash_final(desc, md5_hash))
1120 		goto clear_hash;
1121 
1122 	tcp_put_md5sig_pool();
1123 	return 0;
1124 
1125 clear_hash:
1126 	tcp_put_md5sig_pool();
1127 clear_hash_noput:
1128 	memset(md5_hash, 0, 16);
1129 	return 1;
1130 }
1131 
1132 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1133 
1134 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1135 {
1136 	/*
1137 	 * This gets called for each TCP segment that arrives
1138 	 * so we want to be efficient.
1139 	 * We have 3 drop cases:
1140 	 * o No MD5 hash and one expected.
1141 	 * o MD5 hash and we're not expecting one.
1142 	 * o MD5 hash and its wrong.
1143 	 */
1144 	__u8 *hash_location = NULL;
1145 	struct tcp_md5sig_key *hash_expected;
1146 	const struct iphdr *iph = ip_hdr(skb);
1147 	struct tcphdr *th = tcp_hdr(skb);
1148 	int genhash;
1149 	unsigned char newhash[16];
1150 
1151 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1152 	hash_location = tcp_parse_md5sig_option(th);
1153 
1154 	/* We've parsed the options - do we have a hash? */
1155 	if (!hash_expected && !hash_location)
1156 		return 0;
1157 
1158 	if (hash_expected && !hash_location) {
1159 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1160 		return 1;
1161 	}
1162 
1163 	if (!hash_expected && hash_location) {
1164 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1165 		return 1;
1166 	}
1167 
1168 	/* Okay, so this is hash_expected and hash_location -
1169 	 * so we need to calculate the checksum.
1170 	 */
1171 	genhash = tcp_v4_md5_hash_skb(newhash,
1172 				      hash_expected,
1173 				      NULL, NULL, skb);
1174 
1175 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1176 		if (net_ratelimit()) {
1177 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1178 			       &iph->saddr, ntohs(th->source),
1179 			       &iph->daddr, ntohs(th->dest),
1180 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1181 		}
1182 		return 1;
1183 	}
1184 	return 0;
1185 }
1186 
1187 #endif
1188 
1189 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1190 	.family		=	PF_INET,
1191 	.obj_size	=	sizeof(struct tcp_request_sock),
1192 	.rtx_syn_ack	=	tcp_v4_send_synack,
1193 	.send_ack	=	tcp_v4_reqsk_send_ack,
1194 	.destructor	=	tcp_v4_reqsk_destructor,
1195 	.send_reset	=	tcp_v4_send_reset,
1196 };
1197 
1198 #ifdef CONFIG_TCP_MD5SIG
1199 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1200 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1201 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1202 };
1203 #endif
1204 
1205 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1206 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1207 	.twsk_unique	= tcp_twsk_unique,
1208 	.twsk_destructor= tcp_twsk_destructor,
1209 };
1210 
1211 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1212 {
1213 	struct inet_request_sock *ireq;
1214 	struct tcp_options_received tmp_opt;
1215 	struct request_sock *req;
1216 	__be32 saddr = ip_hdr(skb)->saddr;
1217 	__be32 daddr = ip_hdr(skb)->daddr;
1218 	__u32 isn = TCP_SKB_CB(skb)->when;
1219 	struct dst_entry *dst = NULL;
1220 #ifdef CONFIG_SYN_COOKIES
1221 	int want_cookie = 0;
1222 #else
1223 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1224 #endif
1225 
1226 	/* Never answer to SYNs send to broadcast or multicast */
1227 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1228 		goto drop;
1229 
1230 	/* TW buckets are converted to open requests without
1231 	 * limitations, they conserve resources and peer is
1232 	 * evidently real one.
1233 	 */
1234 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1235 #ifdef CONFIG_SYN_COOKIES
1236 		if (sysctl_tcp_syncookies) {
1237 			want_cookie = 1;
1238 		} else
1239 #endif
1240 		goto drop;
1241 	}
1242 
1243 	/* Accept backlog is full. If we have already queued enough
1244 	 * of warm entries in syn queue, drop request. It is better than
1245 	 * clogging syn queue with openreqs with exponentially increasing
1246 	 * timeout.
1247 	 */
1248 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1249 		goto drop;
1250 
1251 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1252 	if (!req)
1253 		goto drop;
1254 
1255 #ifdef CONFIG_TCP_MD5SIG
1256 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1257 #endif
1258 
1259 	tcp_clear_options(&tmp_opt);
1260 	tmp_opt.mss_clamp = 536;
1261 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1262 
1263 	tcp_parse_options(skb, &tmp_opt, 0);
1264 
1265 	if (want_cookie && !tmp_opt.saw_tstamp)
1266 		tcp_clear_options(&tmp_opt);
1267 
1268 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1269 
1270 	tcp_openreq_init(req, &tmp_opt, skb);
1271 
1272 	ireq = inet_rsk(req);
1273 	ireq->loc_addr = daddr;
1274 	ireq->rmt_addr = saddr;
1275 	ireq->no_srccheck = inet_sk(sk)->transparent;
1276 	ireq->opt = tcp_v4_save_options(sk, skb);
1277 
1278 	if (security_inet_conn_request(sk, skb, req))
1279 		goto drop_and_free;
1280 
1281 	if (!want_cookie)
1282 		TCP_ECN_create_request(req, tcp_hdr(skb));
1283 
1284 	if (want_cookie) {
1285 #ifdef CONFIG_SYN_COOKIES
1286 		syn_flood_warning(skb);
1287 		req->cookie_ts = tmp_opt.tstamp_ok;
1288 #endif
1289 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1290 	} else if (!isn) {
1291 		struct inet_peer *peer = NULL;
1292 
1293 		/* VJ's idea. We save last timestamp seen
1294 		 * from the destination in peer table, when entering
1295 		 * state TIME-WAIT, and check against it before
1296 		 * accepting new connection request.
1297 		 *
1298 		 * If "isn" is not zero, this request hit alive
1299 		 * timewait bucket, so that all the necessary checks
1300 		 * are made in the function processing timewait state.
1301 		 */
1302 		if (tmp_opt.saw_tstamp &&
1303 		    tcp_death_row.sysctl_tw_recycle &&
1304 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1305 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1306 		    peer->v4daddr == saddr) {
1307 			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1308 			    (s32)(peer->tcp_ts - req->ts_recent) >
1309 							TCP_PAWS_WINDOW) {
1310 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1311 				goto drop_and_release;
1312 			}
1313 		}
1314 		/* Kill the following clause, if you dislike this way. */
1315 		else if (!sysctl_tcp_syncookies &&
1316 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1317 			  (sysctl_max_syn_backlog >> 2)) &&
1318 			 (!peer || !peer->tcp_ts_stamp) &&
1319 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1320 			/* Without syncookies last quarter of
1321 			 * backlog is filled with destinations,
1322 			 * proven to be alive.
1323 			 * It means that we continue to communicate
1324 			 * to destinations, already remembered
1325 			 * to the moment of synflood.
1326 			 */
1327 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1328 				       &saddr, ntohs(tcp_hdr(skb)->source));
1329 			goto drop_and_release;
1330 		}
1331 
1332 		isn = tcp_v4_init_sequence(skb);
1333 	}
1334 	tcp_rsk(req)->snt_isn = isn;
1335 
1336 	if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1337 		goto drop_and_free;
1338 
1339 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1340 	return 0;
1341 
1342 drop_and_release:
1343 	dst_release(dst);
1344 drop_and_free:
1345 	reqsk_free(req);
1346 drop:
1347 	return 0;
1348 }
1349 
1350 
1351 /*
1352  * The three way handshake has completed - we got a valid synack -
1353  * now create the new socket.
1354  */
1355 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1356 				  struct request_sock *req,
1357 				  struct dst_entry *dst)
1358 {
1359 	struct inet_request_sock *ireq;
1360 	struct inet_sock *newinet;
1361 	struct tcp_sock *newtp;
1362 	struct sock *newsk;
1363 #ifdef CONFIG_TCP_MD5SIG
1364 	struct tcp_md5sig_key *key;
1365 #endif
1366 
1367 	if (sk_acceptq_is_full(sk))
1368 		goto exit_overflow;
1369 
1370 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1371 		goto exit;
1372 
1373 	newsk = tcp_create_openreq_child(sk, req, skb);
1374 	if (!newsk)
1375 		goto exit;
1376 
1377 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1378 	sk_setup_caps(newsk, dst);
1379 
1380 	newtp		      = tcp_sk(newsk);
1381 	newinet		      = inet_sk(newsk);
1382 	ireq		      = inet_rsk(req);
1383 	newinet->daddr	      = ireq->rmt_addr;
1384 	newinet->rcv_saddr    = ireq->loc_addr;
1385 	newinet->saddr	      = ireq->loc_addr;
1386 	newinet->opt	      = ireq->opt;
1387 	ireq->opt	      = NULL;
1388 	newinet->mc_index     = inet_iif(skb);
1389 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1390 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1391 	if (newinet->opt)
1392 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1393 	newinet->id = newtp->write_seq ^ jiffies;
1394 
1395 	tcp_mtup_init(newsk);
1396 	tcp_sync_mss(newsk, dst_mtu(dst));
1397 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1398 	if (tcp_sk(sk)->rx_opt.user_mss &&
1399 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1400 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1401 
1402 	tcp_initialize_rcv_mss(newsk);
1403 
1404 #ifdef CONFIG_TCP_MD5SIG
1405 	/* Copy over the MD5 key from the original socket */
1406 	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1407 		/*
1408 		 * We're using one, so create a matching key
1409 		 * on the newsk structure. If we fail to get
1410 		 * memory, then we end up not copying the key
1411 		 * across. Shucks.
1412 		 */
1413 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1414 		if (newkey != NULL)
1415 			tcp_v4_md5_do_add(newsk, newinet->daddr,
1416 					  newkey, key->keylen);
1417 		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1418 	}
1419 #endif
1420 
1421 	__inet_hash_nolisten(newsk);
1422 	__inet_inherit_port(sk, newsk);
1423 
1424 	return newsk;
1425 
1426 exit_overflow:
1427 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1428 exit:
1429 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1430 	dst_release(dst);
1431 	return NULL;
1432 }
1433 
1434 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1435 {
1436 	struct tcphdr *th = tcp_hdr(skb);
1437 	const struct iphdr *iph = ip_hdr(skb);
1438 	struct sock *nsk;
1439 	struct request_sock **prev;
1440 	/* Find possible connection requests. */
1441 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1442 						       iph->saddr, iph->daddr);
1443 	if (req)
1444 		return tcp_check_req(sk, skb, req, prev);
1445 
1446 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1447 			th->source, iph->daddr, th->dest, inet_iif(skb));
1448 
1449 	if (nsk) {
1450 		if (nsk->sk_state != TCP_TIME_WAIT) {
1451 			bh_lock_sock(nsk);
1452 			return nsk;
1453 		}
1454 		inet_twsk_put(inet_twsk(nsk));
1455 		return NULL;
1456 	}
1457 
1458 #ifdef CONFIG_SYN_COOKIES
1459 	if (!th->rst && !th->syn && th->ack)
1460 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1461 #endif
1462 	return sk;
1463 }
1464 
1465 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1466 {
1467 	const struct iphdr *iph = ip_hdr(skb);
1468 
1469 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1470 		if (!tcp_v4_check(skb->len, iph->saddr,
1471 				  iph->daddr, skb->csum)) {
1472 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1473 			return 0;
1474 		}
1475 	}
1476 
1477 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1478 				       skb->len, IPPROTO_TCP, 0);
1479 
1480 	if (skb->len <= 76) {
1481 		return __skb_checksum_complete(skb);
1482 	}
1483 	return 0;
1484 }
1485 
1486 
1487 /* The socket must have it's spinlock held when we get
1488  * here.
1489  *
1490  * We have a potential double-lock case here, so even when
1491  * doing backlog processing we use the BH locking scheme.
1492  * This is because we cannot sleep with the original spinlock
1493  * held.
1494  */
1495 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1496 {
1497 	struct sock *rsk;
1498 #ifdef CONFIG_TCP_MD5SIG
1499 	/*
1500 	 * We really want to reject the packet as early as possible
1501 	 * if:
1502 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1503 	 *  o There is an MD5 option and we're not expecting one
1504 	 */
1505 	if (tcp_v4_inbound_md5_hash(sk, skb))
1506 		goto discard;
1507 #endif
1508 
1509 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1510 		TCP_CHECK_TIMER(sk);
1511 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1512 			rsk = sk;
1513 			goto reset;
1514 		}
1515 		TCP_CHECK_TIMER(sk);
1516 		return 0;
1517 	}
1518 
1519 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1520 		goto csum_err;
1521 
1522 	if (sk->sk_state == TCP_LISTEN) {
1523 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1524 		if (!nsk)
1525 			goto discard;
1526 
1527 		if (nsk != sk) {
1528 			if (tcp_child_process(sk, nsk, skb)) {
1529 				rsk = nsk;
1530 				goto reset;
1531 			}
1532 			return 0;
1533 		}
1534 	}
1535 
1536 	TCP_CHECK_TIMER(sk);
1537 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1538 		rsk = sk;
1539 		goto reset;
1540 	}
1541 	TCP_CHECK_TIMER(sk);
1542 	return 0;
1543 
1544 reset:
1545 	tcp_v4_send_reset(rsk, skb);
1546 discard:
1547 	kfree_skb(skb);
1548 	/* Be careful here. If this function gets more complicated and
1549 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1550 	 * might be destroyed here. This current version compiles correctly,
1551 	 * but you have been warned.
1552 	 */
1553 	return 0;
1554 
1555 csum_err:
1556 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1557 	goto discard;
1558 }
1559 
1560 /*
1561  *	From tcp_input.c
1562  */
1563 
1564 int tcp_v4_rcv(struct sk_buff *skb)
1565 {
1566 	const struct iphdr *iph;
1567 	struct tcphdr *th;
1568 	struct sock *sk;
1569 	int ret;
1570 	struct net *net = dev_net(skb->dev);
1571 
1572 	if (skb->pkt_type != PACKET_HOST)
1573 		goto discard_it;
1574 
1575 	/* Count it even if it's bad */
1576 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1577 
1578 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1579 		goto discard_it;
1580 
1581 	th = tcp_hdr(skb);
1582 
1583 	if (th->doff < sizeof(struct tcphdr) / 4)
1584 		goto bad_packet;
1585 	if (!pskb_may_pull(skb, th->doff * 4))
1586 		goto discard_it;
1587 
1588 	/* An explanation is required here, I think.
1589 	 * Packet length and doff are validated by header prediction,
1590 	 * provided case of th->doff==0 is eliminated.
1591 	 * So, we defer the checks. */
1592 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1593 		goto bad_packet;
1594 
1595 	th = tcp_hdr(skb);
1596 	iph = ip_hdr(skb);
1597 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1598 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1599 				    skb->len - th->doff * 4);
1600 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1601 	TCP_SKB_CB(skb)->when	 = 0;
1602 	TCP_SKB_CB(skb)->flags	 = iph->tos;
1603 	TCP_SKB_CB(skb)->sacked	 = 0;
1604 
1605 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1606 	if (!sk)
1607 		goto no_tcp_socket;
1608 
1609 process:
1610 	if (sk->sk_state == TCP_TIME_WAIT)
1611 		goto do_time_wait;
1612 
1613 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1614 		goto discard_and_relse;
1615 	nf_reset(skb);
1616 
1617 	if (sk_filter(sk, skb))
1618 		goto discard_and_relse;
1619 
1620 	skb->dev = NULL;
1621 
1622 	bh_lock_sock_nested(sk);
1623 	ret = 0;
1624 	if (!sock_owned_by_user(sk)) {
1625 #ifdef CONFIG_NET_DMA
1626 		struct tcp_sock *tp = tcp_sk(sk);
1627 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1628 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1629 		if (tp->ucopy.dma_chan)
1630 			ret = tcp_v4_do_rcv(sk, skb);
1631 		else
1632 #endif
1633 		{
1634 			if (!tcp_prequeue(sk, skb))
1635 				ret = tcp_v4_do_rcv(sk, skb);
1636 		}
1637 	} else
1638 		sk_add_backlog(sk, skb);
1639 	bh_unlock_sock(sk);
1640 
1641 	sock_put(sk);
1642 
1643 	return ret;
1644 
1645 no_tcp_socket:
1646 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1647 		goto discard_it;
1648 
1649 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1650 bad_packet:
1651 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1652 	} else {
1653 		tcp_v4_send_reset(NULL, skb);
1654 	}
1655 
1656 discard_it:
1657 	/* Discard frame. */
1658 	kfree_skb(skb);
1659 	return 0;
1660 
1661 discard_and_relse:
1662 	sock_put(sk);
1663 	goto discard_it;
1664 
1665 do_time_wait:
1666 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1667 		inet_twsk_put(inet_twsk(sk));
1668 		goto discard_it;
1669 	}
1670 
1671 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1672 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1673 		inet_twsk_put(inet_twsk(sk));
1674 		goto discard_it;
1675 	}
1676 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1677 	case TCP_TW_SYN: {
1678 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1679 							&tcp_hashinfo,
1680 							iph->daddr, th->dest,
1681 							inet_iif(skb));
1682 		if (sk2) {
1683 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1684 			inet_twsk_put(inet_twsk(sk));
1685 			sk = sk2;
1686 			goto process;
1687 		}
1688 		/* Fall through to ACK */
1689 	}
1690 	case TCP_TW_ACK:
1691 		tcp_v4_timewait_ack(sk, skb);
1692 		break;
1693 	case TCP_TW_RST:
1694 		goto no_tcp_socket;
1695 	case TCP_TW_SUCCESS:;
1696 	}
1697 	goto discard_it;
1698 }
1699 
1700 /* VJ's idea. Save last timestamp seen from this destination
1701  * and hold it at least for normal timewait interval to use for duplicate
1702  * segment detection in subsequent connections, before they enter synchronized
1703  * state.
1704  */
1705 
1706 int tcp_v4_remember_stamp(struct sock *sk)
1707 {
1708 	struct inet_sock *inet = inet_sk(sk);
1709 	struct tcp_sock *tp = tcp_sk(sk);
1710 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1711 	struct inet_peer *peer = NULL;
1712 	int release_it = 0;
1713 
1714 	if (!rt || rt->rt_dst != inet->daddr) {
1715 		peer = inet_getpeer(inet->daddr, 1);
1716 		release_it = 1;
1717 	} else {
1718 		if (!rt->peer)
1719 			rt_bind_peer(rt, 1);
1720 		peer = rt->peer;
1721 	}
1722 
1723 	if (peer) {
1724 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1725 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1726 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1727 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1728 			peer->tcp_ts = tp->rx_opt.ts_recent;
1729 		}
1730 		if (release_it)
1731 			inet_putpeer(peer);
1732 		return 1;
1733 	}
1734 
1735 	return 0;
1736 }
1737 
1738 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1739 {
1740 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1741 
1742 	if (peer) {
1743 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1744 
1745 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1746 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1747 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1748 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1749 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1750 		}
1751 		inet_putpeer(peer);
1752 		return 1;
1753 	}
1754 
1755 	return 0;
1756 }
1757 
1758 const struct inet_connection_sock_af_ops ipv4_specific = {
1759 	.queue_xmit	   = ip_queue_xmit,
1760 	.send_check	   = tcp_v4_send_check,
1761 	.rebuild_header	   = inet_sk_rebuild_header,
1762 	.conn_request	   = tcp_v4_conn_request,
1763 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1764 	.remember_stamp	   = tcp_v4_remember_stamp,
1765 	.net_header_len	   = sizeof(struct iphdr),
1766 	.setsockopt	   = ip_setsockopt,
1767 	.getsockopt	   = ip_getsockopt,
1768 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1769 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1770 	.bind_conflict	   = inet_csk_bind_conflict,
1771 #ifdef CONFIG_COMPAT
1772 	.compat_setsockopt = compat_ip_setsockopt,
1773 	.compat_getsockopt = compat_ip_getsockopt,
1774 #endif
1775 };
1776 
1777 #ifdef CONFIG_TCP_MD5SIG
1778 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1779 	.md5_lookup		= tcp_v4_md5_lookup,
1780 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1781 	.md5_add		= tcp_v4_md5_add_func,
1782 	.md5_parse		= tcp_v4_parse_md5_keys,
1783 };
1784 #endif
1785 
1786 /* NOTE: A lot of things set to zero explicitly by call to
1787  *       sk_alloc() so need not be done here.
1788  */
1789 static int tcp_v4_init_sock(struct sock *sk)
1790 {
1791 	struct inet_connection_sock *icsk = inet_csk(sk);
1792 	struct tcp_sock *tp = tcp_sk(sk);
1793 
1794 	skb_queue_head_init(&tp->out_of_order_queue);
1795 	tcp_init_xmit_timers(sk);
1796 	tcp_prequeue_init(tp);
1797 
1798 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1799 	tp->mdev = TCP_TIMEOUT_INIT;
1800 
1801 	/* So many TCP implementations out there (incorrectly) count the
1802 	 * initial SYN frame in their delayed-ACK and congestion control
1803 	 * algorithms that we must have the following bandaid to talk
1804 	 * efficiently to them.  -DaveM
1805 	 */
1806 	tp->snd_cwnd = 2;
1807 
1808 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1809 	 * initialization of these values.
1810 	 */
1811 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1812 	tp->snd_cwnd_clamp = ~0;
1813 	tp->mss_cache = 536;
1814 
1815 	tp->reordering = sysctl_tcp_reordering;
1816 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1817 
1818 	sk->sk_state = TCP_CLOSE;
1819 
1820 	sk->sk_write_space = sk_stream_write_space;
1821 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1822 
1823 	icsk->icsk_af_ops = &ipv4_specific;
1824 	icsk->icsk_sync_mss = tcp_sync_mss;
1825 #ifdef CONFIG_TCP_MD5SIG
1826 	tp->af_specific = &tcp_sock_ipv4_specific;
1827 #endif
1828 
1829 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1830 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1831 
1832 	local_bh_disable();
1833 	percpu_counter_inc(&tcp_sockets_allocated);
1834 	local_bh_enable();
1835 
1836 	return 0;
1837 }
1838 
1839 void tcp_v4_destroy_sock(struct sock *sk)
1840 {
1841 	struct tcp_sock *tp = tcp_sk(sk);
1842 
1843 	tcp_clear_xmit_timers(sk);
1844 
1845 	tcp_cleanup_congestion_control(sk);
1846 
1847 	/* Cleanup up the write buffer. */
1848 	tcp_write_queue_purge(sk);
1849 
1850 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1851 	__skb_queue_purge(&tp->out_of_order_queue);
1852 
1853 #ifdef CONFIG_TCP_MD5SIG
1854 	/* Clean up the MD5 key list, if any */
1855 	if (tp->md5sig_info) {
1856 		tcp_v4_clear_md5_list(sk);
1857 		kfree(tp->md5sig_info);
1858 		tp->md5sig_info = NULL;
1859 	}
1860 #endif
1861 
1862 #ifdef CONFIG_NET_DMA
1863 	/* Cleans up our sk_async_wait_queue */
1864 	__skb_queue_purge(&sk->sk_async_wait_queue);
1865 #endif
1866 
1867 	/* Clean prequeue, it must be empty really */
1868 	__skb_queue_purge(&tp->ucopy.prequeue);
1869 
1870 	/* Clean up a referenced TCP bind bucket. */
1871 	if (inet_csk(sk)->icsk_bind_hash)
1872 		inet_put_port(sk);
1873 
1874 	/*
1875 	 * If sendmsg cached page exists, toss it.
1876 	 */
1877 	if (sk->sk_sndmsg_page) {
1878 		__free_page(sk->sk_sndmsg_page);
1879 		sk->sk_sndmsg_page = NULL;
1880 	}
1881 
1882 	percpu_counter_dec(&tcp_sockets_allocated);
1883 }
1884 
1885 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1886 
1887 #ifdef CONFIG_PROC_FS
1888 /* Proc filesystem TCP sock list dumping. */
1889 
1890 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1891 {
1892 	return hlist_nulls_empty(head) ? NULL :
1893 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1894 }
1895 
1896 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1897 {
1898 	return !is_a_nulls(tw->tw_node.next) ?
1899 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1900 }
1901 
1902 static void *listening_get_next(struct seq_file *seq, void *cur)
1903 {
1904 	struct inet_connection_sock *icsk;
1905 	struct hlist_nulls_node *node;
1906 	struct sock *sk = cur;
1907 	struct inet_listen_hashbucket *ilb;
1908 	struct tcp_iter_state *st = seq->private;
1909 	struct net *net = seq_file_net(seq);
1910 
1911 	if (!sk) {
1912 		st->bucket = 0;
1913 		ilb = &tcp_hashinfo.listening_hash[0];
1914 		spin_lock_bh(&ilb->lock);
1915 		sk = sk_nulls_head(&ilb->head);
1916 		goto get_sk;
1917 	}
1918 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1919 	++st->num;
1920 
1921 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1922 		struct request_sock *req = cur;
1923 
1924 		icsk = inet_csk(st->syn_wait_sk);
1925 		req = req->dl_next;
1926 		while (1) {
1927 			while (req) {
1928 				if (req->rsk_ops->family == st->family) {
1929 					cur = req;
1930 					goto out;
1931 				}
1932 				req = req->dl_next;
1933 			}
1934 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1935 				break;
1936 get_req:
1937 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1938 		}
1939 		sk	  = sk_next(st->syn_wait_sk);
1940 		st->state = TCP_SEQ_STATE_LISTENING;
1941 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1942 	} else {
1943 		icsk = inet_csk(sk);
1944 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1945 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1946 			goto start_req;
1947 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1948 		sk = sk_next(sk);
1949 	}
1950 get_sk:
1951 	sk_nulls_for_each_from(sk, node) {
1952 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1953 			cur = sk;
1954 			goto out;
1955 		}
1956 		icsk = inet_csk(sk);
1957 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1958 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1959 start_req:
1960 			st->uid		= sock_i_uid(sk);
1961 			st->syn_wait_sk = sk;
1962 			st->state	= TCP_SEQ_STATE_OPENREQ;
1963 			st->sbucket	= 0;
1964 			goto get_req;
1965 		}
1966 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1967 	}
1968 	spin_unlock_bh(&ilb->lock);
1969 	if (++st->bucket < INET_LHTABLE_SIZE) {
1970 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1971 		spin_lock_bh(&ilb->lock);
1972 		sk = sk_nulls_head(&ilb->head);
1973 		goto get_sk;
1974 	}
1975 	cur = NULL;
1976 out:
1977 	return cur;
1978 }
1979 
1980 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1981 {
1982 	void *rc = listening_get_next(seq, NULL);
1983 
1984 	while (rc && *pos) {
1985 		rc = listening_get_next(seq, rc);
1986 		--*pos;
1987 	}
1988 	return rc;
1989 }
1990 
1991 static inline int empty_bucket(struct tcp_iter_state *st)
1992 {
1993 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
1994 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
1995 }
1996 
1997 static void *established_get_first(struct seq_file *seq)
1998 {
1999 	struct tcp_iter_state *st = seq->private;
2000 	struct net *net = seq_file_net(seq);
2001 	void *rc = NULL;
2002 
2003 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2004 		struct sock *sk;
2005 		struct hlist_nulls_node *node;
2006 		struct inet_timewait_sock *tw;
2007 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2008 
2009 		/* Lockless fast path for the common case of empty buckets */
2010 		if (empty_bucket(st))
2011 			continue;
2012 
2013 		spin_lock_bh(lock);
2014 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2015 			if (sk->sk_family != st->family ||
2016 			    !net_eq(sock_net(sk), net)) {
2017 				continue;
2018 			}
2019 			rc = sk;
2020 			goto out;
2021 		}
2022 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2023 		inet_twsk_for_each(tw, node,
2024 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2025 			if (tw->tw_family != st->family ||
2026 			    !net_eq(twsk_net(tw), net)) {
2027 				continue;
2028 			}
2029 			rc = tw;
2030 			goto out;
2031 		}
2032 		spin_unlock_bh(lock);
2033 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2034 	}
2035 out:
2036 	return rc;
2037 }
2038 
2039 static void *established_get_next(struct seq_file *seq, void *cur)
2040 {
2041 	struct sock *sk = cur;
2042 	struct inet_timewait_sock *tw;
2043 	struct hlist_nulls_node *node;
2044 	struct tcp_iter_state *st = seq->private;
2045 	struct net *net = seq_file_net(seq);
2046 
2047 	++st->num;
2048 
2049 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2050 		tw = cur;
2051 		tw = tw_next(tw);
2052 get_tw:
2053 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2054 			tw = tw_next(tw);
2055 		}
2056 		if (tw) {
2057 			cur = tw;
2058 			goto out;
2059 		}
2060 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2061 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2062 
2063 		/* Look for next non empty bucket */
2064 		while (++st->bucket < tcp_hashinfo.ehash_size &&
2065 				empty_bucket(st))
2066 			;
2067 		if (st->bucket >= tcp_hashinfo.ehash_size)
2068 			return NULL;
2069 
2070 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2071 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2072 	} else
2073 		sk = sk_nulls_next(sk);
2074 
2075 	sk_nulls_for_each_from(sk, node) {
2076 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2077 			goto found;
2078 	}
2079 
2080 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2081 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2082 	goto get_tw;
2083 found:
2084 	cur = sk;
2085 out:
2086 	return cur;
2087 }
2088 
2089 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2090 {
2091 	void *rc = established_get_first(seq);
2092 
2093 	while (rc && pos) {
2094 		rc = established_get_next(seq, rc);
2095 		--pos;
2096 	}
2097 	return rc;
2098 }
2099 
2100 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2101 {
2102 	void *rc;
2103 	struct tcp_iter_state *st = seq->private;
2104 
2105 	st->state = TCP_SEQ_STATE_LISTENING;
2106 	rc	  = listening_get_idx(seq, &pos);
2107 
2108 	if (!rc) {
2109 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2110 		rc	  = established_get_idx(seq, pos);
2111 	}
2112 
2113 	return rc;
2114 }
2115 
2116 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2117 {
2118 	struct tcp_iter_state *st = seq->private;
2119 	st->state = TCP_SEQ_STATE_LISTENING;
2120 	st->num = 0;
2121 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2122 }
2123 
2124 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2125 {
2126 	void *rc = NULL;
2127 	struct tcp_iter_state *st;
2128 
2129 	if (v == SEQ_START_TOKEN) {
2130 		rc = tcp_get_idx(seq, 0);
2131 		goto out;
2132 	}
2133 	st = seq->private;
2134 
2135 	switch (st->state) {
2136 	case TCP_SEQ_STATE_OPENREQ:
2137 	case TCP_SEQ_STATE_LISTENING:
2138 		rc = listening_get_next(seq, v);
2139 		if (!rc) {
2140 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2141 			rc	  = established_get_first(seq);
2142 		}
2143 		break;
2144 	case TCP_SEQ_STATE_ESTABLISHED:
2145 	case TCP_SEQ_STATE_TIME_WAIT:
2146 		rc = established_get_next(seq, v);
2147 		break;
2148 	}
2149 out:
2150 	++*pos;
2151 	return rc;
2152 }
2153 
2154 static void tcp_seq_stop(struct seq_file *seq, void *v)
2155 {
2156 	struct tcp_iter_state *st = seq->private;
2157 
2158 	switch (st->state) {
2159 	case TCP_SEQ_STATE_OPENREQ:
2160 		if (v) {
2161 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2162 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2163 		}
2164 	case TCP_SEQ_STATE_LISTENING:
2165 		if (v != SEQ_START_TOKEN)
2166 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2167 		break;
2168 	case TCP_SEQ_STATE_TIME_WAIT:
2169 	case TCP_SEQ_STATE_ESTABLISHED:
2170 		if (v)
2171 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2172 		break;
2173 	}
2174 }
2175 
2176 static int tcp_seq_open(struct inode *inode, struct file *file)
2177 {
2178 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2179 	struct tcp_iter_state *s;
2180 	int err;
2181 
2182 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2183 			  sizeof(struct tcp_iter_state));
2184 	if (err < 0)
2185 		return err;
2186 
2187 	s = ((struct seq_file *)file->private_data)->private;
2188 	s->family		= afinfo->family;
2189 	return 0;
2190 }
2191 
2192 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2193 {
2194 	int rc = 0;
2195 	struct proc_dir_entry *p;
2196 
2197 	afinfo->seq_fops.open		= tcp_seq_open;
2198 	afinfo->seq_fops.read		= seq_read;
2199 	afinfo->seq_fops.llseek		= seq_lseek;
2200 	afinfo->seq_fops.release	= seq_release_net;
2201 
2202 	afinfo->seq_ops.start		= tcp_seq_start;
2203 	afinfo->seq_ops.next		= tcp_seq_next;
2204 	afinfo->seq_ops.stop		= tcp_seq_stop;
2205 
2206 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2207 			     &afinfo->seq_fops, afinfo);
2208 	if (!p)
2209 		rc = -ENOMEM;
2210 	return rc;
2211 }
2212 
2213 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2214 {
2215 	proc_net_remove(net, afinfo->name);
2216 }
2217 
2218 static void get_openreq4(struct sock *sk, struct request_sock *req,
2219 			 struct seq_file *f, int i, int uid, int *len)
2220 {
2221 	const struct inet_request_sock *ireq = inet_rsk(req);
2222 	int ttd = req->expires - jiffies;
2223 
2224 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2225 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2226 		i,
2227 		ireq->loc_addr,
2228 		ntohs(inet_sk(sk)->sport),
2229 		ireq->rmt_addr,
2230 		ntohs(ireq->rmt_port),
2231 		TCP_SYN_RECV,
2232 		0, 0, /* could print option size, but that is af dependent. */
2233 		1,    /* timers active (only the expire timer) */
2234 		jiffies_to_clock_t(ttd),
2235 		req->retrans,
2236 		uid,
2237 		0,  /* non standard timer */
2238 		0, /* open_requests have no inode */
2239 		atomic_read(&sk->sk_refcnt),
2240 		req,
2241 		len);
2242 }
2243 
2244 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2245 {
2246 	int timer_active;
2247 	unsigned long timer_expires;
2248 	struct tcp_sock *tp = tcp_sk(sk);
2249 	const struct inet_connection_sock *icsk = inet_csk(sk);
2250 	struct inet_sock *inet = inet_sk(sk);
2251 	__be32 dest = inet->daddr;
2252 	__be32 src = inet->rcv_saddr;
2253 	__u16 destp = ntohs(inet->dport);
2254 	__u16 srcp = ntohs(inet->sport);
2255 
2256 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2257 		timer_active	= 1;
2258 		timer_expires	= icsk->icsk_timeout;
2259 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2260 		timer_active	= 4;
2261 		timer_expires	= icsk->icsk_timeout;
2262 	} else if (timer_pending(&sk->sk_timer)) {
2263 		timer_active	= 2;
2264 		timer_expires	= sk->sk_timer.expires;
2265 	} else {
2266 		timer_active	= 0;
2267 		timer_expires = jiffies;
2268 	}
2269 
2270 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2271 			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2272 		i, src, srcp, dest, destp, sk->sk_state,
2273 		tp->write_seq - tp->snd_una,
2274 		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2275 					     (tp->rcv_nxt - tp->copied_seq),
2276 		timer_active,
2277 		jiffies_to_clock_t(timer_expires - jiffies),
2278 		icsk->icsk_retransmits,
2279 		sock_i_uid(sk),
2280 		icsk->icsk_probes_out,
2281 		sock_i_ino(sk),
2282 		atomic_read(&sk->sk_refcnt), sk,
2283 		jiffies_to_clock_t(icsk->icsk_rto),
2284 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2285 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2286 		tp->snd_cwnd,
2287 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2288 		len);
2289 }
2290 
2291 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2292 			       struct seq_file *f, int i, int *len)
2293 {
2294 	__be32 dest, src;
2295 	__u16 destp, srcp;
2296 	int ttd = tw->tw_ttd - jiffies;
2297 
2298 	if (ttd < 0)
2299 		ttd = 0;
2300 
2301 	dest  = tw->tw_daddr;
2302 	src   = tw->tw_rcv_saddr;
2303 	destp = ntohs(tw->tw_dport);
2304 	srcp  = ntohs(tw->tw_sport);
2305 
2306 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2307 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2308 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2309 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2310 		atomic_read(&tw->tw_refcnt), tw, len);
2311 }
2312 
2313 #define TMPSZ 150
2314 
2315 static int tcp4_seq_show(struct seq_file *seq, void *v)
2316 {
2317 	struct tcp_iter_state *st;
2318 	int len;
2319 
2320 	if (v == SEQ_START_TOKEN) {
2321 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2322 			   "  sl  local_address rem_address   st tx_queue "
2323 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2324 			   "inode");
2325 		goto out;
2326 	}
2327 	st = seq->private;
2328 
2329 	switch (st->state) {
2330 	case TCP_SEQ_STATE_LISTENING:
2331 	case TCP_SEQ_STATE_ESTABLISHED:
2332 		get_tcp4_sock(v, seq, st->num, &len);
2333 		break;
2334 	case TCP_SEQ_STATE_OPENREQ:
2335 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2336 		break;
2337 	case TCP_SEQ_STATE_TIME_WAIT:
2338 		get_timewait4_sock(v, seq, st->num, &len);
2339 		break;
2340 	}
2341 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2342 out:
2343 	return 0;
2344 }
2345 
2346 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2347 	.name		= "tcp",
2348 	.family		= AF_INET,
2349 	.seq_fops	= {
2350 		.owner		= THIS_MODULE,
2351 	},
2352 	.seq_ops	= {
2353 		.show		= tcp4_seq_show,
2354 	},
2355 };
2356 
2357 static int tcp4_proc_init_net(struct net *net)
2358 {
2359 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2360 }
2361 
2362 static void tcp4_proc_exit_net(struct net *net)
2363 {
2364 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2365 }
2366 
2367 static struct pernet_operations tcp4_net_ops = {
2368 	.init = tcp4_proc_init_net,
2369 	.exit = tcp4_proc_exit_net,
2370 };
2371 
2372 int __init tcp4_proc_init(void)
2373 {
2374 	return register_pernet_subsys(&tcp4_net_ops);
2375 }
2376 
2377 void tcp4_proc_exit(void)
2378 {
2379 	unregister_pernet_subsys(&tcp4_net_ops);
2380 }
2381 #endif /* CONFIG_PROC_FS */
2382 
2383 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2384 {
2385 	struct iphdr *iph = skb_gro_network_header(skb);
2386 
2387 	switch (skb->ip_summed) {
2388 	case CHECKSUM_COMPLETE:
2389 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2390 				  skb->csum)) {
2391 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2392 			break;
2393 		}
2394 
2395 		/* fall through */
2396 	case CHECKSUM_NONE:
2397 		NAPI_GRO_CB(skb)->flush = 1;
2398 		return NULL;
2399 	}
2400 
2401 	return tcp_gro_receive(head, skb);
2402 }
2403 EXPORT_SYMBOL(tcp4_gro_receive);
2404 
2405 int tcp4_gro_complete(struct sk_buff *skb)
2406 {
2407 	struct iphdr *iph = ip_hdr(skb);
2408 	struct tcphdr *th = tcp_hdr(skb);
2409 
2410 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2411 				  iph->saddr, iph->daddr, 0);
2412 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2413 
2414 	return tcp_gro_complete(skb);
2415 }
2416 EXPORT_SYMBOL(tcp4_gro_complete);
2417 
2418 struct proto tcp_prot = {
2419 	.name			= "TCP",
2420 	.owner			= THIS_MODULE,
2421 	.close			= tcp_close,
2422 	.connect		= tcp_v4_connect,
2423 	.disconnect		= tcp_disconnect,
2424 	.accept			= inet_csk_accept,
2425 	.ioctl			= tcp_ioctl,
2426 	.init			= tcp_v4_init_sock,
2427 	.destroy		= tcp_v4_destroy_sock,
2428 	.shutdown		= tcp_shutdown,
2429 	.setsockopt		= tcp_setsockopt,
2430 	.getsockopt		= tcp_getsockopt,
2431 	.recvmsg		= tcp_recvmsg,
2432 	.backlog_rcv		= tcp_v4_do_rcv,
2433 	.hash			= inet_hash,
2434 	.unhash			= inet_unhash,
2435 	.get_port		= inet_csk_get_port,
2436 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2437 	.sockets_allocated	= &tcp_sockets_allocated,
2438 	.orphan_count		= &tcp_orphan_count,
2439 	.memory_allocated	= &tcp_memory_allocated,
2440 	.memory_pressure	= &tcp_memory_pressure,
2441 	.sysctl_mem		= sysctl_tcp_mem,
2442 	.sysctl_wmem		= sysctl_tcp_wmem,
2443 	.sysctl_rmem		= sysctl_tcp_rmem,
2444 	.max_header		= MAX_TCP_HEADER,
2445 	.obj_size		= sizeof(struct tcp_sock),
2446 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2447 	.twsk_prot		= &tcp_timewait_sock_ops,
2448 	.rsk_prot		= &tcp_request_sock_ops,
2449 	.h.hashinfo		= &tcp_hashinfo,
2450 #ifdef CONFIG_COMPAT
2451 	.compat_setsockopt	= compat_tcp_setsockopt,
2452 	.compat_getsockopt	= compat_tcp_getsockopt,
2453 #endif
2454 };
2455 
2456 
2457 static int __net_init tcp_sk_init(struct net *net)
2458 {
2459 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2460 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2461 }
2462 
2463 static void __net_exit tcp_sk_exit(struct net *net)
2464 {
2465 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2466 	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
2467 }
2468 
2469 static struct pernet_operations __net_initdata tcp_sk_ops = {
2470        .init = tcp_sk_init,
2471        .exit = tcp_sk_exit,
2472 };
2473 
2474 void __init tcp_v4_init(void)
2475 {
2476 	inet_hashinfo_init(&tcp_hashinfo);
2477 	if (register_pernet_subsys(&tcp_sk_ops))
2478 		panic("Failed to create the TCP control socket.\n");
2479 }
2480 
2481 EXPORT_SYMBOL(ipv4_specific);
2482 EXPORT_SYMBOL(tcp_hashinfo);
2483 EXPORT_SYMBOL(tcp_prot);
2484 EXPORT_SYMBOL(tcp_v4_conn_request);
2485 EXPORT_SYMBOL(tcp_v4_connect);
2486 EXPORT_SYMBOL(tcp_v4_do_rcv);
2487 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2488 EXPORT_SYMBOL(tcp_v4_send_check);
2489 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2490 
2491 #ifdef CONFIG_PROC_FS
2492 EXPORT_SYMBOL(tcp_proc_register);
2493 EXPORT_SYMBOL(tcp_proc_unregister);
2494 #endif
2495 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2496 
2497