xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision 64c70b1c)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9  *
10  *		IPv4 specific functions
11  *
12  *
13  *		code split from:
14  *		linux/ipv4/tcp.c
15  *		linux/ipv4/tcp_input.c
16  *		linux/ipv4/tcp_output.c
17  *
18  *		See tcp.c for author information
19  *
20  *	This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  */
25 
26 /*
27  * Changes:
28  *		David S. Miller	:	New socket lookup architecture.
29  *					This code is dedicated to John Dyson.
30  *		David S. Miller :	Change semantics of established hash,
31  *					half is devoted to TIME_WAIT sockets
32  *					and the rest go in the other half.
33  *		Andi Kleen :		Add support for syncookies and fixed
34  *					some bugs: ip options weren't passed to
35  *					the TCP layer, missed a check for an
36  *					ACK bit.
37  *		Andi Kleen :		Implemented fast path mtu discovery.
38  *	     				Fixed many serious bugs in the
39  *					request_sock handling and moved
40  *					most of it into the af independent code.
41  *					Added tail drop and some other bugfixes.
42  *					Added new listen semantics.
43  *		Mike McLagan	:	Routing by source
44  *	Juan Jose Ciarlante:		ip_dynaddr bits
45  *		Andi Kleen:		various fixes.
46  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
47  *					coma.
48  *	Andi Kleen		:	Fix new listen.
49  *	Andi Kleen		:	Fix accept error reporting.
50  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
51  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
52  *					a single port at the same time.
53  */
54 
55 
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 
65 #include <net/icmp.h>
66 #include <net/inet_hashtables.h>
67 #include <net/tcp.h>
68 #include <net/transp_v6.h>
69 #include <net/ipv6.h>
70 #include <net/inet_common.h>
71 #include <net/timewait_sock.h>
72 #include <net/xfrm.h>
73 #include <net/netdma.h>
74 
75 #include <linux/inet.h>
76 #include <linux/ipv6.h>
77 #include <linux/stddef.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 
81 #include <linux/crypto.h>
82 #include <linux/scatterlist.h>
83 
84 int sysctl_tcp_tw_reuse __read_mostly;
85 int sysctl_tcp_low_latency __read_mostly;
86 
87 /* Check TCP sequence numbers in ICMP packets. */
88 #define ICMP_MIN_LENGTH 8
89 
90 /* Socket used for sending RSTs */
91 static struct socket *tcp_socket __read_mostly;
92 
93 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
94 
95 #ifdef CONFIG_TCP_MD5SIG
96 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
97 						   __be32 addr);
98 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
99 				   __be32 saddr, __be32 daddr,
100 				   struct tcphdr *th, int protocol,
101 				   int tcplen);
102 #endif
103 
104 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
105 	.lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
106 	.lhash_users = ATOMIC_INIT(0),
107 	.lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
108 };
109 
110 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
111 {
112 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
113 				 inet_csk_bind_conflict);
114 }
115 
116 static void tcp_v4_hash(struct sock *sk)
117 {
118 	inet_hash(&tcp_hashinfo, sk);
119 }
120 
121 void tcp_unhash(struct sock *sk)
122 {
123 	inet_unhash(&tcp_hashinfo, sk);
124 }
125 
126 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
127 {
128 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
129 					  ip_hdr(skb)->saddr,
130 					  tcp_hdr(skb)->dest,
131 					  tcp_hdr(skb)->source);
132 }
133 
134 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
135 {
136 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
137 	struct tcp_sock *tp = tcp_sk(sk);
138 
139 	/* With PAWS, it is safe from the viewpoint
140 	   of data integrity. Even without PAWS it is safe provided sequence
141 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
142 
143 	   Actually, the idea is close to VJ's one, only timestamp cache is
144 	   held not per host, but per port pair and TW bucket is used as state
145 	   holder.
146 
147 	   If TW bucket has been already destroyed we fall back to VJ's scheme
148 	   and use initial timestamp retrieved from peer table.
149 	 */
150 	if (tcptw->tw_ts_recent_stamp &&
151 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
152 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
153 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
154 		if (tp->write_seq == 0)
155 			tp->write_seq = 1;
156 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
157 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
158 		sock_hold(sktw);
159 		return 1;
160 	}
161 
162 	return 0;
163 }
164 
165 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
166 
167 /* This will initiate an outgoing connection. */
168 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
169 {
170 	struct inet_sock *inet = inet_sk(sk);
171 	struct tcp_sock *tp = tcp_sk(sk);
172 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
173 	struct rtable *rt;
174 	__be32 daddr, nexthop;
175 	int tmp;
176 	int err;
177 
178 	if (addr_len < sizeof(struct sockaddr_in))
179 		return -EINVAL;
180 
181 	if (usin->sin_family != AF_INET)
182 		return -EAFNOSUPPORT;
183 
184 	nexthop = daddr = usin->sin_addr.s_addr;
185 	if (inet->opt && inet->opt->srr) {
186 		if (!daddr)
187 			return -EINVAL;
188 		nexthop = inet->opt->faddr;
189 	}
190 
191 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
192 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
193 			       IPPROTO_TCP,
194 			       inet->sport, usin->sin_port, sk, 1);
195 	if (tmp < 0) {
196 		if (tmp == -ENETUNREACH)
197 			IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
198 		return tmp;
199 	}
200 
201 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
202 		ip_rt_put(rt);
203 		return -ENETUNREACH;
204 	}
205 
206 	if (!inet->opt || !inet->opt->srr)
207 		daddr = rt->rt_dst;
208 
209 	if (!inet->saddr)
210 		inet->saddr = rt->rt_src;
211 	inet->rcv_saddr = inet->saddr;
212 
213 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
214 		/* Reset inherited state */
215 		tp->rx_opt.ts_recent	   = 0;
216 		tp->rx_opt.ts_recent_stamp = 0;
217 		tp->write_seq		   = 0;
218 	}
219 
220 	if (tcp_death_row.sysctl_tw_recycle &&
221 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
222 		struct inet_peer *peer = rt_get_peer(rt);
223 		/*
224 		 * VJ's idea. We save last timestamp seen from
225 		 * the destination in peer table, when entering state
226 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
227 		 * when trying new connection.
228 		 */
229 		if (peer != NULL &&
230 		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
231 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
232 			tp->rx_opt.ts_recent = peer->tcp_ts;
233 		}
234 	}
235 
236 	inet->dport = usin->sin_port;
237 	inet->daddr = daddr;
238 
239 	inet_csk(sk)->icsk_ext_hdr_len = 0;
240 	if (inet->opt)
241 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
242 
243 	tp->rx_opt.mss_clamp = 536;
244 
245 	/* Socket identity is still unknown (sport may be zero).
246 	 * However we set state to SYN-SENT and not releasing socket
247 	 * lock select source port, enter ourselves into the hash tables and
248 	 * complete initialization after this.
249 	 */
250 	tcp_set_state(sk, TCP_SYN_SENT);
251 	err = inet_hash_connect(&tcp_death_row, sk);
252 	if (err)
253 		goto failure;
254 
255 	err = ip_route_newports(&rt, IPPROTO_TCP,
256 				inet->sport, inet->dport, sk);
257 	if (err)
258 		goto failure;
259 
260 	/* OK, now commit destination to socket.  */
261 	sk->sk_gso_type = SKB_GSO_TCPV4;
262 	sk_setup_caps(sk, &rt->u.dst);
263 
264 	if (!tp->write_seq)
265 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
266 							   inet->daddr,
267 							   inet->sport,
268 							   usin->sin_port);
269 
270 	inet->id = tp->write_seq ^ jiffies;
271 
272 	err = tcp_connect(sk);
273 	rt = NULL;
274 	if (err)
275 		goto failure;
276 
277 	return 0;
278 
279 failure:
280 	/*
281 	 * This unhashes the socket and releases the local port,
282 	 * if necessary.
283 	 */
284 	tcp_set_state(sk, TCP_CLOSE);
285 	ip_rt_put(rt);
286 	sk->sk_route_caps = 0;
287 	inet->dport = 0;
288 	return err;
289 }
290 
291 /*
292  * This routine does path mtu discovery as defined in RFC1191.
293  */
294 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
295 {
296 	struct dst_entry *dst;
297 	struct inet_sock *inet = inet_sk(sk);
298 
299 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
300 	 * send out by Linux are always <576bytes so they should go through
301 	 * unfragmented).
302 	 */
303 	if (sk->sk_state == TCP_LISTEN)
304 		return;
305 
306 	/* We don't check in the destentry if pmtu discovery is forbidden
307 	 * on this route. We just assume that no packet_to_big packets
308 	 * are send back when pmtu discovery is not active.
309 	 * There is a small race when the user changes this flag in the
310 	 * route, but I think that's acceptable.
311 	 */
312 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
313 		return;
314 
315 	dst->ops->update_pmtu(dst, mtu);
316 
317 	/* Something is about to be wrong... Remember soft error
318 	 * for the case, if this connection will not able to recover.
319 	 */
320 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
321 		sk->sk_err_soft = EMSGSIZE;
322 
323 	mtu = dst_mtu(dst);
324 
325 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
326 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
327 		tcp_sync_mss(sk, mtu);
328 
329 		/* Resend the TCP packet because it's
330 		 * clear that the old packet has been
331 		 * dropped. This is the new "fast" path mtu
332 		 * discovery.
333 		 */
334 		tcp_simple_retransmit(sk);
335 	} /* else let the usual retransmit timer handle it */
336 }
337 
338 /*
339  * This routine is called by the ICMP module when it gets some
340  * sort of error condition.  If err < 0 then the socket should
341  * be closed and the error returned to the user.  If err > 0
342  * it's just the icmp type << 8 | icmp code.  After adjustment
343  * header points to the first 8 bytes of the tcp header.  We need
344  * to find the appropriate port.
345  *
346  * The locking strategy used here is very "optimistic". When
347  * someone else accesses the socket the ICMP is just dropped
348  * and for some paths there is no check at all.
349  * A more general error queue to queue errors for later handling
350  * is probably better.
351  *
352  */
353 
354 void tcp_v4_err(struct sk_buff *skb, u32 info)
355 {
356 	struct iphdr *iph = (struct iphdr *)skb->data;
357 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
358 	struct tcp_sock *tp;
359 	struct inet_sock *inet;
360 	const int type = icmp_hdr(skb)->type;
361 	const int code = icmp_hdr(skb)->code;
362 	struct sock *sk;
363 	__u32 seq;
364 	int err;
365 
366 	if (skb->len < (iph->ihl << 2) + 8) {
367 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
368 		return;
369 	}
370 
371 	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
372 			 th->source, inet_iif(skb));
373 	if (!sk) {
374 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
375 		return;
376 	}
377 	if (sk->sk_state == TCP_TIME_WAIT) {
378 		inet_twsk_put(inet_twsk(sk));
379 		return;
380 	}
381 
382 	bh_lock_sock(sk);
383 	/* If too many ICMPs get dropped on busy
384 	 * servers this needs to be solved differently.
385 	 */
386 	if (sock_owned_by_user(sk))
387 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
388 
389 	if (sk->sk_state == TCP_CLOSE)
390 		goto out;
391 
392 	tp = tcp_sk(sk);
393 	seq = ntohl(th->seq);
394 	if (sk->sk_state != TCP_LISTEN &&
395 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
396 		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
397 		goto out;
398 	}
399 
400 	switch (type) {
401 	case ICMP_SOURCE_QUENCH:
402 		/* Just silently ignore these. */
403 		goto out;
404 	case ICMP_PARAMETERPROB:
405 		err = EPROTO;
406 		break;
407 	case ICMP_DEST_UNREACH:
408 		if (code > NR_ICMP_UNREACH)
409 			goto out;
410 
411 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
412 			if (!sock_owned_by_user(sk))
413 				do_pmtu_discovery(sk, iph, info);
414 			goto out;
415 		}
416 
417 		err = icmp_err_convert[code].errno;
418 		break;
419 	case ICMP_TIME_EXCEEDED:
420 		err = EHOSTUNREACH;
421 		break;
422 	default:
423 		goto out;
424 	}
425 
426 	switch (sk->sk_state) {
427 		struct request_sock *req, **prev;
428 	case TCP_LISTEN:
429 		if (sock_owned_by_user(sk))
430 			goto out;
431 
432 		req = inet_csk_search_req(sk, &prev, th->dest,
433 					  iph->daddr, iph->saddr);
434 		if (!req)
435 			goto out;
436 
437 		/* ICMPs are not backlogged, hence we cannot get
438 		   an established socket here.
439 		 */
440 		BUG_TRAP(!req->sk);
441 
442 		if (seq != tcp_rsk(req)->snt_isn) {
443 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
444 			goto out;
445 		}
446 
447 		/*
448 		 * Still in SYN_RECV, just remove it silently.
449 		 * There is no good way to pass the error to the newly
450 		 * created socket, and POSIX does not want network
451 		 * errors returned from accept().
452 		 */
453 		inet_csk_reqsk_queue_drop(sk, req, prev);
454 		goto out;
455 
456 	case TCP_SYN_SENT:
457 	case TCP_SYN_RECV:  /* Cannot happen.
458 			       It can f.e. if SYNs crossed.
459 			     */
460 		if (!sock_owned_by_user(sk)) {
461 			sk->sk_err = err;
462 
463 			sk->sk_error_report(sk);
464 
465 			tcp_done(sk);
466 		} else {
467 			sk->sk_err_soft = err;
468 		}
469 		goto out;
470 	}
471 
472 	/* If we've already connected we will keep trying
473 	 * until we time out, or the user gives up.
474 	 *
475 	 * rfc1122 4.2.3.9 allows to consider as hard errors
476 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
477 	 * but it is obsoleted by pmtu discovery).
478 	 *
479 	 * Note, that in modern internet, where routing is unreliable
480 	 * and in each dark corner broken firewalls sit, sending random
481 	 * errors ordered by their masters even this two messages finally lose
482 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
483 	 *
484 	 * Now we are in compliance with RFCs.
485 	 *							--ANK (980905)
486 	 */
487 
488 	inet = inet_sk(sk);
489 	if (!sock_owned_by_user(sk) && inet->recverr) {
490 		sk->sk_err = err;
491 		sk->sk_error_report(sk);
492 	} else	{ /* Only an error on timeout */
493 		sk->sk_err_soft = err;
494 	}
495 
496 out:
497 	bh_unlock_sock(sk);
498 	sock_put(sk);
499 }
500 
501 /* This routine computes an IPv4 TCP checksum. */
502 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
503 {
504 	struct inet_sock *inet = inet_sk(sk);
505 	struct tcphdr *th = tcp_hdr(skb);
506 
507 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
508 		th->check = ~tcp_v4_check(len, inet->saddr,
509 					  inet->daddr, 0);
510 		skb->csum_start = skb_transport_header(skb) - skb->head;
511 		skb->csum_offset = offsetof(struct tcphdr, check);
512 	} else {
513 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
514 					 csum_partial((char *)th,
515 						      th->doff << 2,
516 						      skb->csum));
517 	}
518 }
519 
520 int tcp_v4_gso_send_check(struct sk_buff *skb)
521 {
522 	const struct iphdr *iph;
523 	struct tcphdr *th;
524 
525 	if (!pskb_may_pull(skb, sizeof(*th)))
526 		return -EINVAL;
527 
528 	iph = ip_hdr(skb);
529 	th = tcp_hdr(skb);
530 
531 	th->check = 0;
532 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
533 	skb->csum_start = skb_transport_header(skb) - skb->head;
534 	skb->csum_offset = offsetof(struct tcphdr, check);
535 	skb->ip_summed = CHECKSUM_PARTIAL;
536 	return 0;
537 }
538 
539 /*
540  *	This routine will send an RST to the other tcp.
541  *
542  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
543  *		      for reset.
544  *	Answer: if a packet caused RST, it is not for a socket
545  *		existing in our system, if it is matched to a socket,
546  *		it is just duplicate segment or bug in other side's TCP.
547  *		So that we build reply only basing on parameters
548  *		arrived with segment.
549  *	Exception: precedence violation. We do not implement it in any case.
550  */
551 
552 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
553 {
554 	struct tcphdr *th = tcp_hdr(skb);
555 	struct {
556 		struct tcphdr th;
557 #ifdef CONFIG_TCP_MD5SIG
558 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
559 #endif
560 	} rep;
561 	struct ip_reply_arg arg;
562 #ifdef CONFIG_TCP_MD5SIG
563 	struct tcp_md5sig_key *key;
564 #endif
565 
566 	/* Never send a reset in response to a reset. */
567 	if (th->rst)
568 		return;
569 
570 	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
571 		return;
572 
573 	/* Swap the send and the receive. */
574 	memset(&rep, 0, sizeof(rep));
575 	rep.th.dest   = th->source;
576 	rep.th.source = th->dest;
577 	rep.th.doff   = sizeof(struct tcphdr) / 4;
578 	rep.th.rst    = 1;
579 
580 	if (th->ack) {
581 		rep.th.seq = th->ack_seq;
582 	} else {
583 		rep.th.ack = 1;
584 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
585 				       skb->len - (th->doff << 2));
586 	}
587 
588 	memset(&arg, 0, sizeof(arg));
589 	arg.iov[0].iov_base = (unsigned char *)&rep;
590 	arg.iov[0].iov_len  = sizeof(rep.th);
591 
592 #ifdef CONFIG_TCP_MD5SIG
593 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
594 	if (key) {
595 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
596 				   (TCPOPT_NOP << 16) |
597 				   (TCPOPT_MD5SIG << 8) |
598 				   TCPOLEN_MD5SIG);
599 		/* Update length and the length the header thinks exists */
600 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
601 		rep.th.doff = arg.iov[0].iov_len / 4;
602 
603 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
604 					key,
605 					ip_hdr(skb)->daddr,
606 					ip_hdr(skb)->saddr,
607 					&rep.th, IPPROTO_TCP,
608 					arg.iov[0].iov_len);
609 	}
610 #endif
611 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
612 				      ip_hdr(skb)->saddr, /* XXX */
613 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
614 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
615 
616 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
617 
618 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
619 	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
620 }
621 
622 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
623    outside socket context is ugly, certainly. What can I do?
624  */
625 
626 static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
627 			    struct sk_buff *skb, u32 seq, u32 ack,
628 			    u32 win, u32 ts)
629 {
630 	struct tcphdr *th = tcp_hdr(skb);
631 	struct {
632 		struct tcphdr th;
633 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
634 #ifdef CONFIG_TCP_MD5SIG
635 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
636 #endif
637 			];
638 	} rep;
639 	struct ip_reply_arg arg;
640 #ifdef CONFIG_TCP_MD5SIG
641 	struct tcp_md5sig_key *key;
642 	struct tcp_md5sig_key tw_key;
643 #endif
644 
645 	memset(&rep.th, 0, sizeof(struct tcphdr));
646 	memset(&arg, 0, sizeof(arg));
647 
648 	arg.iov[0].iov_base = (unsigned char *)&rep;
649 	arg.iov[0].iov_len  = sizeof(rep.th);
650 	if (ts) {
651 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
652 				   (TCPOPT_TIMESTAMP << 8) |
653 				   TCPOLEN_TIMESTAMP);
654 		rep.opt[1] = htonl(tcp_time_stamp);
655 		rep.opt[2] = htonl(ts);
656 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
657 	}
658 
659 	/* Swap the send and the receive. */
660 	rep.th.dest    = th->source;
661 	rep.th.source  = th->dest;
662 	rep.th.doff    = arg.iov[0].iov_len / 4;
663 	rep.th.seq     = htonl(seq);
664 	rep.th.ack_seq = htonl(ack);
665 	rep.th.ack     = 1;
666 	rep.th.window  = htons(win);
667 
668 #ifdef CONFIG_TCP_MD5SIG
669 	/*
670 	 * The SKB holds an imcoming packet, but may not have a valid ->sk
671 	 * pointer. This is especially the case when we're dealing with a
672 	 * TIME_WAIT ack, because the sk structure is long gone, and only
673 	 * the tcp_timewait_sock remains. So the md5 key is stashed in that
674 	 * structure, and we use it in preference.  I believe that (twsk ||
675 	 * skb->sk) holds true, but we program defensively.
676 	 */
677 	if (!twsk && skb->sk) {
678 		key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
679 	} else if (twsk && twsk->tw_md5_keylen) {
680 		tw_key.key = twsk->tw_md5_key;
681 		tw_key.keylen = twsk->tw_md5_keylen;
682 		key = &tw_key;
683 	} else
684 		key = NULL;
685 
686 	if (key) {
687 		int offset = (ts) ? 3 : 0;
688 
689 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
690 					  (TCPOPT_NOP << 16) |
691 					  (TCPOPT_MD5SIG << 8) |
692 					  TCPOLEN_MD5SIG);
693 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
694 		rep.th.doff = arg.iov[0].iov_len/4;
695 
696 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
697 					key,
698 					ip_hdr(skb)->daddr,
699 					ip_hdr(skb)->saddr,
700 					&rep.th, IPPROTO_TCP,
701 					arg.iov[0].iov_len);
702 	}
703 #endif
704 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
705 				      ip_hdr(skb)->saddr, /* XXX */
706 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
707 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
708 	if (twsk)
709 		arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
710 
711 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
712 
713 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
714 }
715 
716 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
717 {
718 	struct inet_timewait_sock *tw = inet_twsk(sk);
719 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
720 
721 	tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
722 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
723 			tcptw->tw_ts_recent);
724 
725 	inet_twsk_put(tw);
726 }
727 
728 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
729 				  struct request_sock *req)
730 {
731 	tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
732 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
733 			req->ts_recent);
734 }
735 
736 /*
737  *	Send a SYN-ACK after having received an ACK.
738  *	This still operates on a request_sock only, not on a big
739  *	socket.
740  */
741 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
742 			      struct dst_entry *dst)
743 {
744 	const struct inet_request_sock *ireq = inet_rsk(req);
745 	int err = -1;
746 	struct sk_buff * skb;
747 
748 	/* First, grab a route. */
749 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
750 		goto out;
751 
752 	skb = tcp_make_synack(sk, dst, req);
753 
754 	if (skb) {
755 		struct tcphdr *th = tcp_hdr(skb);
756 
757 		th->check = tcp_v4_check(skb->len,
758 					 ireq->loc_addr,
759 					 ireq->rmt_addr,
760 					 csum_partial((char *)th, skb->len,
761 						      skb->csum));
762 
763 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
764 					    ireq->rmt_addr,
765 					    ireq->opt);
766 		err = net_xmit_eval(err);
767 	}
768 
769 out:
770 	dst_release(dst);
771 	return err;
772 }
773 
774 /*
775  *	IPv4 request_sock destructor.
776  */
777 static void tcp_v4_reqsk_destructor(struct request_sock *req)
778 {
779 	kfree(inet_rsk(req)->opt);
780 }
781 
782 #ifdef CONFIG_SYN_COOKIES
783 static void syn_flood_warning(struct sk_buff *skb)
784 {
785 	static unsigned long warntime;
786 
787 	if (time_after(jiffies, (warntime + HZ * 60))) {
788 		warntime = jiffies;
789 		printk(KERN_INFO
790 		       "possible SYN flooding on port %d. Sending cookies.\n",
791 		       ntohs(tcp_hdr(skb)->dest));
792 	}
793 }
794 #endif
795 
796 /*
797  * Save and compile IPv4 options into the request_sock if needed.
798  */
799 static struct ip_options *tcp_v4_save_options(struct sock *sk,
800 					      struct sk_buff *skb)
801 {
802 	struct ip_options *opt = &(IPCB(skb)->opt);
803 	struct ip_options *dopt = NULL;
804 
805 	if (opt && opt->optlen) {
806 		int opt_size = optlength(opt);
807 		dopt = kmalloc(opt_size, GFP_ATOMIC);
808 		if (dopt) {
809 			if (ip_options_echo(dopt, skb)) {
810 				kfree(dopt);
811 				dopt = NULL;
812 			}
813 		}
814 	}
815 	return dopt;
816 }
817 
818 #ifdef CONFIG_TCP_MD5SIG
819 /*
820  * RFC2385 MD5 checksumming requires a mapping of
821  * IP address->MD5 Key.
822  * We need to maintain these in the sk structure.
823  */
824 
825 /* Find the Key structure for an address.  */
826 static struct tcp_md5sig_key *
827 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
828 {
829 	struct tcp_sock *tp = tcp_sk(sk);
830 	int i;
831 
832 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
833 		return NULL;
834 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
835 		if (tp->md5sig_info->keys4[i].addr == addr)
836 			return (struct tcp_md5sig_key *)
837 						&tp->md5sig_info->keys4[i];
838 	}
839 	return NULL;
840 }
841 
842 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
843 					 struct sock *addr_sk)
844 {
845 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
846 }
847 
848 EXPORT_SYMBOL(tcp_v4_md5_lookup);
849 
850 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
851 						      struct request_sock *req)
852 {
853 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
854 }
855 
856 /* This can be called on a newly created socket, from other files */
857 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
858 		      u8 *newkey, u8 newkeylen)
859 {
860 	/* Add Key to the list */
861 	struct tcp4_md5sig_key *key;
862 	struct tcp_sock *tp = tcp_sk(sk);
863 	struct tcp4_md5sig_key *keys;
864 
865 	key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr);
866 	if (key) {
867 		/* Pre-existing entry - just update that one. */
868 		kfree(key->key);
869 		key->key = newkey;
870 		key->keylen = newkeylen;
871 	} else {
872 		struct tcp_md5sig_info *md5sig;
873 
874 		if (!tp->md5sig_info) {
875 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
876 						  GFP_ATOMIC);
877 			if (!tp->md5sig_info) {
878 				kfree(newkey);
879 				return -ENOMEM;
880 			}
881 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
882 		}
883 		if (tcp_alloc_md5sig_pool() == NULL) {
884 			kfree(newkey);
885 			return -ENOMEM;
886 		}
887 		md5sig = tp->md5sig_info;
888 
889 		if (md5sig->alloced4 == md5sig->entries4) {
890 			keys = kmalloc((sizeof(*keys) *
891 					(md5sig->entries4 + 1)), GFP_ATOMIC);
892 			if (!keys) {
893 				kfree(newkey);
894 				tcp_free_md5sig_pool();
895 				return -ENOMEM;
896 			}
897 
898 			if (md5sig->entries4)
899 				memcpy(keys, md5sig->keys4,
900 				       sizeof(*keys) * md5sig->entries4);
901 
902 			/* Free old key list, and reference new one */
903 			if (md5sig->keys4)
904 				kfree(md5sig->keys4);
905 			md5sig->keys4 = keys;
906 			md5sig->alloced4++;
907 		}
908 		md5sig->entries4++;
909 		md5sig->keys4[md5sig->entries4 - 1].addr   = addr;
910 		md5sig->keys4[md5sig->entries4 - 1].key    = newkey;
911 		md5sig->keys4[md5sig->entries4 - 1].keylen = newkeylen;
912 	}
913 	return 0;
914 }
915 
916 EXPORT_SYMBOL(tcp_v4_md5_do_add);
917 
918 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
919 			       u8 *newkey, u8 newkeylen)
920 {
921 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
922 				 newkey, newkeylen);
923 }
924 
925 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
926 {
927 	struct tcp_sock *tp = tcp_sk(sk);
928 	int i;
929 
930 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
931 		if (tp->md5sig_info->keys4[i].addr == addr) {
932 			/* Free the key */
933 			kfree(tp->md5sig_info->keys4[i].key);
934 			tp->md5sig_info->entries4--;
935 
936 			if (tp->md5sig_info->entries4 == 0) {
937 				kfree(tp->md5sig_info->keys4);
938 				tp->md5sig_info->keys4 = NULL;
939 				tp->md5sig_info->alloced4 = 0;
940 			} else if (tp->md5sig_info->entries4 != i) {
941 				/* Need to do some manipulation */
942 				memcpy(&tp->md5sig_info->keys4[i],
943 				       &tp->md5sig_info->keys4[i+1],
944 				       (tp->md5sig_info->entries4 - i) *
945 					sizeof(struct tcp4_md5sig_key));
946 			}
947 			tcp_free_md5sig_pool();
948 			return 0;
949 		}
950 	}
951 	return -ENOENT;
952 }
953 
954 EXPORT_SYMBOL(tcp_v4_md5_do_del);
955 
956 static void tcp_v4_clear_md5_list(struct sock *sk)
957 {
958 	struct tcp_sock *tp = tcp_sk(sk);
959 
960 	/* Free each key, then the set of key keys,
961 	 * the crypto element, and then decrement our
962 	 * hold on the last resort crypto.
963 	 */
964 	if (tp->md5sig_info->entries4) {
965 		int i;
966 		for (i = 0; i < tp->md5sig_info->entries4; i++)
967 			kfree(tp->md5sig_info->keys4[i].key);
968 		tp->md5sig_info->entries4 = 0;
969 		tcp_free_md5sig_pool();
970 	}
971 	if (tp->md5sig_info->keys4) {
972 		kfree(tp->md5sig_info->keys4);
973 		tp->md5sig_info->keys4 = NULL;
974 		tp->md5sig_info->alloced4  = 0;
975 	}
976 }
977 
978 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
979 				 int optlen)
980 {
981 	struct tcp_md5sig cmd;
982 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
983 	u8 *newkey;
984 
985 	if (optlen < sizeof(cmd))
986 		return -EINVAL;
987 
988 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
989 		return -EFAULT;
990 
991 	if (sin->sin_family != AF_INET)
992 		return -EINVAL;
993 
994 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
995 		if (!tcp_sk(sk)->md5sig_info)
996 			return -ENOENT;
997 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
998 	}
999 
1000 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1001 		return -EINVAL;
1002 
1003 	if (!tcp_sk(sk)->md5sig_info) {
1004 		struct tcp_sock *tp = tcp_sk(sk);
1005 		struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
1006 
1007 		if (!p)
1008 			return -EINVAL;
1009 
1010 		tp->md5sig_info = p;
1011 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1012 	}
1013 
1014 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
1015 	if (!newkey)
1016 		return -ENOMEM;
1017 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1018 				 newkey, cmd.tcpm_keylen);
1019 }
1020 
1021 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1022 				   __be32 saddr, __be32 daddr,
1023 				   struct tcphdr *th, int protocol,
1024 				   int tcplen)
1025 {
1026 	struct scatterlist sg[4];
1027 	__u16 data_len;
1028 	int block = 0;
1029 	__sum16 old_checksum;
1030 	struct tcp_md5sig_pool *hp;
1031 	struct tcp4_pseudohdr *bp;
1032 	struct hash_desc *desc;
1033 	int err;
1034 	unsigned int nbytes = 0;
1035 
1036 	/*
1037 	 * Okay, so RFC2385 is turned on for this connection,
1038 	 * so we need to generate the MD5 hash for the packet now.
1039 	 */
1040 
1041 	hp = tcp_get_md5sig_pool();
1042 	if (!hp)
1043 		goto clear_hash_noput;
1044 
1045 	bp = &hp->md5_blk.ip4;
1046 	desc = &hp->md5_desc;
1047 
1048 	/*
1049 	 * 1. the TCP pseudo-header (in the order: source IP address,
1050 	 * destination IP address, zero-padded protocol number, and
1051 	 * segment length)
1052 	 */
1053 	bp->saddr = saddr;
1054 	bp->daddr = daddr;
1055 	bp->pad = 0;
1056 	bp->protocol = protocol;
1057 	bp->len = htons(tcplen);
1058 	sg_set_buf(&sg[block++], bp, sizeof(*bp));
1059 	nbytes += sizeof(*bp);
1060 
1061 	/* 2. the TCP header, excluding options, and assuming a
1062 	 * checksum of zero/
1063 	 */
1064 	old_checksum = th->check;
1065 	th->check = 0;
1066 	sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1067 	nbytes += sizeof(struct tcphdr);
1068 
1069 	/* 3. the TCP segment data (if any) */
1070 	data_len = tcplen - (th->doff << 2);
1071 	if (data_len > 0) {
1072 		unsigned char *data = (unsigned char *)th + (th->doff << 2);
1073 		sg_set_buf(&sg[block++], data, data_len);
1074 		nbytes += data_len;
1075 	}
1076 
1077 	/* 4. an independently-specified key or password, known to both
1078 	 * TCPs and presumably connection-specific
1079 	 */
1080 	sg_set_buf(&sg[block++], key->key, key->keylen);
1081 	nbytes += key->keylen;
1082 
1083 	/* Now store the Hash into the packet */
1084 	err = crypto_hash_init(desc);
1085 	if (err)
1086 		goto clear_hash;
1087 	err = crypto_hash_update(desc, sg, nbytes);
1088 	if (err)
1089 		goto clear_hash;
1090 	err = crypto_hash_final(desc, md5_hash);
1091 	if (err)
1092 		goto clear_hash;
1093 
1094 	/* Reset header, and free up the crypto */
1095 	tcp_put_md5sig_pool();
1096 	th->check = old_checksum;
1097 
1098 out:
1099 	return 0;
1100 clear_hash:
1101 	tcp_put_md5sig_pool();
1102 clear_hash_noput:
1103 	memset(md5_hash, 0, 16);
1104 	goto out;
1105 }
1106 
1107 int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1108 			 struct sock *sk,
1109 			 struct dst_entry *dst,
1110 			 struct request_sock *req,
1111 			 struct tcphdr *th, int protocol,
1112 			 int tcplen)
1113 {
1114 	__be32 saddr, daddr;
1115 
1116 	if (sk) {
1117 		saddr = inet_sk(sk)->saddr;
1118 		daddr = inet_sk(sk)->daddr;
1119 	} else {
1120 		struct rtable *rt = (struct rtable *)dst;
1121 		BUG_ON(!rt);
1122 		saddr = rt->rt_src;
1123 		daddr = rt->rt_dst;
1124 	}
1125 	return tcp_v4_do_calc_md5_hash(md5_hash, key,
1126 				       saddr, daddr,
1127 				       th, protocol, tcplen);
1128 }
1129 
1130 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1131 
1132 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1133 {
1134 	/*
1135 	 * This gets called for each TCP segment that arrives
1136 	 * so we want to be efficient.
1137 	 * We have 3 drop cases:
1138 	 * o No MD5 hash and one expected.
1139 	 * o MD5 hash and we're not expecting one.
1140 	 * o MD5 hash and its wrong.
1141 	 */
1142 	__u8 *hash_location = NULL;
1143 	struct tcp_md5sig_key *hash_expected;
1144 	const struct iphdr *iph = ip_hdr(skb);
1145 	struct tcphdr *th = tcp_hdr(skb);
1146 	int length = (th->doff << 2) - sizeof(struct tcphdr);
1147 	int genhash;
1148 	unsigned char *ptr;
1149 	unsigned char newhash[16];
1150 
1151 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1152 
1153 	/*
1154 	 * If the TCP option length is less than the TCP_MD5SIG
1155 	 * option length, then we can shortcut
1156 	 */
1157 	if (length < TCPOLEN_MD5SIG) {
1158 		if (hash_expected)
1159 			return 1;
1160 		else
1161 			return 0;
1162 	}
1163 
1164 	/* Okay, we can't shortcut - we have to grub through the options */
1165 	ptr = (unsigned char *)(th + 1);
1166 	while (length > 0) {
1167 		int opcode = *ptr++;
1168 		int opsize;
1169 
1170 		switch (opcode) {
1171 		case TCPOPT_EOL:
1172 			goto done_opts;
1173 		case TCPOPT_NOP:
1174 			length--;
1175 			continue;
1176 		default:
1177 			opsize = *ptr++;
1178 			if (opsize < 2)
1179 				goto done_opts;
1180 			if (opsize > length)
1181 				goto done_opts;
1182 
1183 			if (opcode == TCPOPT_MD5SIG) {
1184 				hash_location = ptr;
1185 				goto done_opts;
1186 			}
1187 		}
1188 		ptr += opsize-2;
1189 		length -= opsize;
1190 	}
1191 done_opts:
1192 	/* We've parsed the options - do we have a hash? */
1193 	if (!hash_expected && !hash_location)
1194 		return 0;
1195 
1196 	if (hash_expected && !hash_location) {
1197 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
1198 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1199 			       NIPQUAD(iph->saddr), ntohs(th->source),
1200 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1201 		return 1;
1202 	}
1203 
1204 	if (!hash_expected && hash_location) {
1205 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1206 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1207 			       NIPQUAD(iph->saddr), ntohs(th->source),
1208 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1209 		return 1;
1210 	}
1211 
1212 	/* Okay, so this is hash_expected and hash_location -
1213 	 * so we need to calculate the checksum.
1214 	 */
1215 	genhash = tcp_v4_do_calc_md5_hash(newhash,
1216 					  hash_expected,
1217 					  iph->saddr, iph->daddr,
1218 					  th, sk->sk_protocol,
1219 					  skb->len);
1220 
1221 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1222 		if (net_ratelimit()) {
1223 			printk(KERN_INFO "MD5 Hash failed for "
1224 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1225 			       NIPQUAD(iph->saddr), ntohs(th->source),
1226 			       NIPQUAD(iph->daddr), ntohs(th->dest),
1227 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1228 		}
1229 		return 1;
1230 	}
1231 	return 0;
1232 }
1233 
1234 #endif
1235 
1236 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1237 	.family		=	PF_INET,
1238 	.obj_size	=	sizeof(struct tcp_request_sock),
1239 	.rtx_syn_ack	=	tcp_v4_send_synack,
1240 	.send_ack	=	tcp_v4_reqsk_send_ack,
1241 	.destructor	=	tcp_v4_reqsk_destructor,
1242 	.send_reset	=	tcp_v4_send_reset,
1243 };
1244 
1245 #ifdef CONFIG_TCP_MD5SIG
1246 static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1247 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1248 };
1249 #endif
1250 
1251 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1252 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1253 	.twsk_unique	= tcp_twsk_unique,
1254 	.twsk_destructor= tcp_twsk_destructor,
1255 };
1256 
1257 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1258 {
1259 	struct inet_request_sock *ireq;
1260 	struct tcp_options_received tmp_opt;
1261 	struct request_sock *req;
1262 	__be32 saddr = ip_hdr(skb)->saddr;
1263 	__be32 daddr = ip_hdr(skb)->daddr;
1264 	__u32 isn = TCP_SKB_CB(skb)->when;
1265 	struct dst_entry *dst = NULL;
1266 #ifdef CONFIG_SYN_COOKIES
1267 	int want_cookie = 0;
1268 #else
1269 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1270 #endif
1271 
1272 	/* Never answer to SYNs send to broadcast or multicast */
1273 	if (((struct rtable *)skb->dst)->rt_flags &
1274 	    (RTCF_BROADCAST | RTCF_MULTICAST))
1275 		goto drop;
1276 
1277 	/* TW buckets are converted to open requests without
1278 	 * limitations, they conserve resources and peer is
1279 	 * evidently real one.
1280 	 */
1281 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1282 #ifdef CONFIG_SYN_COOKIES
1283 		if (sysctl_tcp_syncookies) {
1284 			want_cookie = 1;
1285 		} else
1286 #endif
1287 		goto drop;
1288 	}
1289 
1290 	/* Accept backlog is full. If we have already queued enough
1291 	 * of warm entries in syn queue, drop request. It is better than
1292 	 * clogging syn queue with openreqs with exponentially increasing
1293 	 * timeout.
1294 	 */
1295 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1296 		goto drop;
1297 
1298 	req = reqsk_alloc(&tcp_request_sock_ops);
1299 	if (!req)
1300 		goto drop;
1301 
1302 #ifdef CONFIG_TCP_MD5SIG
1303 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1304 #endif
1305 
1306 	tcp_clear_options(&tmp_opt);
1307 	tmp_opt.mss_clamp = 536;
1308 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1309 
1310 	tcp_parse_options(skb, &tmp_opt, 0);
1311 
1312 	if (want_cookie) {
1313 		tcp_clear_options(&tmp_opt);
1314 		tmp_opt.saw_tstamp = 0;
1315 	}
1316 
1317 	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1318 		/* Some OSes (unknown ones, but I see them on web server, which
1319 		 * contains information interesting only for windows'
1320 		 * users) do not send their stamp in SYN. It is easy case.
1321 		 * We simply do not advertise TS support.
1322 		 */
1323 		tmp_opt.saw_tstamp = 0;
1324 		tmp_opt.tstamp_ok  = 0;
1325 	}
1326 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1327 
1328 	tcp_openreq_init(req, &tmp_opt, skb);
1329 
1330 	if (security_inet_conn_request(sk, skb, req))
1331 		goto drop_and_free;
1332 
1333 	ireq = inet_rsk(req);
1334 	ireq->loc_addr = daddr;
1335 	ireq->rmt_addr = saddr;
1336 	ireq->opt = tcp_v4_save_options(sk, skb);
1337 	if (!want_cookie)
1338 		TCP_ECN_create_request(req, tcp_hdr(skb));
1339 
1340 	if (want_cookie) {
1341 #ifdef CONFIG_SYN_COOKIES
1342 		syn_flood_warning(skb);
1343 #endif
1344 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1345 	} else if (!isn) {
1346 		struct inet_peer *peer = NULL;
1347 
1348 		/* VJ's idea. We save last timestamp seen
1349 		 * from the destination in peer table, when entering
1350 		 * state TIME-WAIT, and check against it before
1351 		 * accepting new connection request.
1352 		 *
1353 		 * If "isn" is not zero, this request hit alive
1354 		 * timewait bucket, so that all the necessary checks
1355 		 * are made in the function processing timewait state.
1356 		 */
1357 		if (tmp_opt.saw_tstamp &&
1358 		    tcp_death_row.sysctl_tw_recycle &&
1359 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1360 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1361 		    peer->v4daddr == saddr) {
1362 			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1363 			    (s32)(peer->tcp_ts - req->ts_recent) >
1364 							TCP_PAWS_WINDOW) {
1365 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1366 				dst_release(dst);
1367 				goto drop_and_free;
1368 			}
1369 		}
1370 		/* Kill the following clause, if you dislike this way. */
1371 		else if (!sysctl_tcp_syncookies &&
1372 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1373 			  (sysctl_max_syn_backlog >> 2)) &&
1374 			 (!peer || !peer->tcp_ts_stamp) &&
1375 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1376 			/* Without syncookies last quarter of
1377 			 * backlog is filled with destinations,
1378 			 * proven to be alive.
1379 			 * It means that we continue to communicate
1380 			 * to destinations, already remembered
1381 			 * to the moment of synflood.
1382 			 */
1383 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1384 				       "request from %u.%u.%u.%u/%u\n",
1385 				       NIPQUAD(saddr),
1386 				       ntohs(tcp_hdr(skb)->source));
1387 			dst_release(dst);
1388 			goto drop_and_free;
1389 		}
1390 
1391 		isn = tcp_v4_init_sequence(skb);
1392 	}
1393 	tcp_rsk(req)->snt_isn = isn;
1394 
1395 	if (tcp_v4_send_synack(sk, req, dst))
1396 		goto drop_and_free;
1397 
1398 	if (want_cookie) {
1399 		reqsk_free(req);
1400 	} else {
1401 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1402 	}
1403 	return 0;
1404 
1405 drop_and_free:
1406 	reqsk_free(req);
1407 drop:
1408 	return 0;
1409 }
1410 
1411 
1412 /*
1413  * The three way handshake has completed - we got a valid synack -
1414  * now create the new socket.
1415  */
1416 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1417 				  struct request_sock *req,
1418 				  struct dst_entry *dst)
1419 {
1420 	struct inet_request_sock *ireq;
1421 	struct inet_sock *newinet;
1422 	struct tcp_sock *newtp;
1423 	struct sock *newsk;
1424 #ifdef CONFIG_TCP_MD5SIG
1425 	struct tcp_md5sig_key *key;
1426 #endif
1427 
1428 	if (sk_acceptq_is_full(sk))
1429 		goto exit_overflow;
1430 
1431 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1432 		goto exit;
1433 
1434 	newsk = tcp_create_openreq_child(sk, req, skb);
1435 	if (!newsk)
1436 		goto exit;
1437 
1438 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1439 	sk_setup_caps(newsk, dst);
1440 
1441 	newtp		      = tcp_sk(newsk);
1442 	newinet		      = inet_sk(newsk);
1443 	ireq		      = inet_rsk(req);
1444 	newinet->daddr	      = ireq->rmt_addr;
1445 	newinet->rcv_saddr    = ireq->loc_addr;
1446 	newinet->saddr	      = ireq->loc_addr;
1447 	newinet->opt	      = ireq->opt;
1448 	ireq->opt	      = NULL;
1449 	newinet->mc_index     = inet_iif(skb);
1450 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1451 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1452 	if (newinet->opt)
1453 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1454 	newinet->id = newtp->write_seq ^ jiffies;
1455 
1456 	tcp_mtup_init(newsk);
1457 	tcp_sync_mss(newsk, dst_mtu(dst));
1458 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1459 	tcp_initialize_rcv_mss(newsk);
1460 
1461 #ifdef CONFIG_TCP_MD5SIG
1462 	/* Copy over the MD5 key from the original socket */
1463 	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1464 		/*
1465 		 * We're using one, so create a matching key
1466 		 * on the newsk structure. If we fail to get
1467 		 * memory, then we end up not copying the key
1468 		 * across. Shucks.
1469 		 */
1470 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1471 		if (newkey != NULL)
1472 			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1473 					  newkey, key->keylen);
1474 	}
1475 #endif
1476 
1477 	__inet_hash(&tcp_hashinfo, newsk, 0);
1478 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
1479 
1480 	return newsk;
1481 
1482 exit_overflow:
1483 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1484 exit:
1485 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1486 	dst_release(dst);
1487 	return NULL;
1488 }
1489 
1490 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1491 {
1492 	struct tcphdr *th = tcp_hdr(skb);
1493 	const struct iphdr *iph = ip_hdr(skb);
1494 	struct sock *nsk;
1495 	struct request_sock **prev;
1496 	/* Find possible connection requests. */
1497 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1498 						       iph->saddr, iph->daddr);
1499 	if (req)
1500 		return tcp_check_req(sk, skb, req, prev);
1501 
1502 	nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
1503 				      iph->daddr, th->dest, inet_iif(skb));
1504 
1505 	if (nsk) {
1506 		if (nsk->sk_state != TCP_TIME_WAIT) {
1507 			bh_lock_sock(nsk);
1508 			return nsk;
1509 		}
1510 		inet_twsk_put(inet_twsk(nsk));
1511 		return NULL;
1512 	}
1513 
1514 #ifdef CONFIG_SYN_COOKIES
1515 	if (!th->rst && !th->syn && th->ack)
1516 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1517 #endif
1518 	return sk;
1519 }
1520 
1521 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1522 {
1523 	const struct iphdr *iph = ip_hdr(skb);
1524 
1525 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1526 		if (!tcp_v4_check(skb->len, iph->saddr,
1527 				  iph->daddr, skb->csum)) {
1528 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1529 			return 0;
1530 		}
1531 	}
1532 
1533 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1534 				       skb->len, IPPROTO_TCP, 0);
1535 
1536 	if (skb->len <= 76) {
1537 		return __skb_checksum_complete(skb);
1538 	}
1539 	return 0;
1540 }
1541 
1542 
1543 /* The socket must have it's spinlock held when we get
1544  * here.
1545  *
1546  * We have a potential double-lock case here, so even when
1547  * doing backlog processing we use the BH locking scheme.
1548  * This is because we cannot sleep with the original spinlock
1549  * held.
1550  */
1551 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1552 {
1553 	struct sock *rsk;
1554 #ifdef CONFIG_TCP_MD5SIG
1555 	/*
1556 	 * We really want to reject the packet as early as possible
1557 	 * if:
1558 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1559 	 *  o There is an MD5 option and we're not expecting one
1560 	 */
1561 	if (tcp_v4_inbound_md5_hash(sk, skb))
1562 		goto discard;
1563 #endif
1564 
1565 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1566 		TCP_CHECK_TIMER(sk);
1567 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1568 			rsk = sk;
1569 			goto reset;
1570 		}
1571 		TCP_CHECK_TIMER(sk);
1572 		return 0;
1573 	}
1574 
1575 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1576 		goto csum_err;
1577 
1578 	if (sk->sk_state == TCP_LISTEN) {
1579 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1580 		if (!nsk)
1581 			goto discard;
1582 
1583 		if (nsk != sk) {
1584 			if (tcp_child_process(sk, nsk, skb)) {
1585 				rsk = nsk;
1586 				goto reset;
1587 			}
1588 			return 0;
1589 		}
1590 	}
1591 
1592 	TCP_CHECK_TIMER(sk);
1593 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1594 		rsk = sk;
1595 		goto reset;
1596 	}
1597 	TCP_CHECK_TIMER(sk);
1598 	return 0;
1599 
1600 reset:
1601 	tcp_v4_send_reset(rsk, skb);
1602 discard:
1603 	kfree_skb(skb);
1604 	/* Be careful here. If this function gets more complicated and
1605 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1606 	 * might be destroyed here. This current version compiles correctly,
1607 	 * but you have been warned.
1608 	 */
1609 	return 0;
1610 
1611 csum_err:
1612 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
1613 	goto discard;
1614 }
1615 
1616 /*
1617  *	From tcp_input.c
1618  */
1619 
1620 int tcp_v4_rcv(struct sk_buff *skb)
1621 {
1622 	const struct iphdr *iph;
1623 	struct tcphdr *th;
1624 	struct sock *sk;
1625 	int ret;
1626 
1627 	if (skb->pkt_type != PACKET_HOST)
1628 		goto discard_it;
1629 
1630 	/* Count it even if it's bad */
1631 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1632 
1633 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1634 		goto discard_it;
1635 
1636 	th = tcp_hdr(skb);
1637 
1638 	if (th->doff < sizeof(struct tcphdr) / 4)
1639 		goto bad_packet;
1640 	if (!pskb_may_pull(skb, th->doff * 4))
1641 		goto discard_it;
1642 
1643 	/* An explanation is required here, I think.
1644 	 * Packet length and doff are validated by header prediction,
1645 	 * provided case of th->doff==0 is eliminated.
1646 	 * So, we defer the checks. */
1647 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1648 		goto bad_packet;
1649 
1650 	th = tcp_hdr(skb);
1651 	iph = ip_hdr(skb);
1652 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1653 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1654 				    skb->len - th->doff * 4);
1655 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1656 	TCP_SKB_CB(skb)->when	 = 0;
1657 	TCP_SKB_CB(skb)->flags	 = iph->tos;
1658 	TCP_SKB_CB(skb)->sacked	 = 0;
1659 
1660 	sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
1661 			   iph->daddr, th->dest, inet_iif(skb));
1662 	if (!sk)
1663 		goto no_tcp_socket;
1664 
1665 process:
1666 	if (sk->sk_state == TCP_TIME_WAIT)
1667 		goto do_time_wait;
1668 
1669 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1670 		goto discard_and_relse;
1671 	nf_reset(skb);
1672 
1673 	if (sk_filter(sk, skb))
1674 		goto discard_and_relse;
1675 
1676 	skb->dev = NULL;
1677 
1678 	bh_lock_sock_nested(sk);
1679 	ret = 0;
1680 	if (!sock_owned_by_user(sk)) {
1681 #ifdef CONFIG_NET_DMA
1682 		struct tcp_sock *tp = tcp_sk(sk);
1683 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1684 			tp->ucopy.dma_chan = get_softnet_dma();
1685 		if (tp->ucopy.dma_chan)
1686 			ret = tcp_v4_do_rcv(sk, skb);
1687 		else
1688 #endif
1689 		{
1690 			if (!tcp_prequeue(sk, skb))
1691 			ret = tcp_v4_do_rcv(sk, skb);
1692 		}
1693 	} else
1694 		sk_add_backlog(sk, skb);
1695 	bh_unlock_sock(sk);
1696 
1697 	sock_put(sk);
1698 
1699 	return ret;
1700 
1701 no_tcp_socket:
1702 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1703 		goto discard_it;
1704 
1705 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1706 bad_packet:
1707 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1708 	} else {
1709 		tcp_v4_send_reset(NULL, skb);
1710 	}
1711 
1712 discard_it:
1713 	/* Discard frame. */
1714 	kfree_skb(skb);
1715 	return 0;
1716 
1717 discard_and_relse:
1718 	sock_put(sk);
1719 	goto discard_it;
1720 
1721 do_time_wait:
1722 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1723 		inet_twsk_put(inet_twsk(sk));
1724 		goto discard_it;
1725 	}
1726 
1727 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1728 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1729 		inet_twsk_put(inet_twsk(sk));
1730 		goto discard_it;
1731 	}
1732 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1733 	case TCP_TW_SYN: {
1734 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1735 							iph->daddr, th->dest,
1736 							inet_iif(skb));
1737 		if (sk2) {
1738 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1739 			inet_twsk_put(inet_twsk(sk));
1740 			sk = sk2;
1741 			goto process;
1742 		}
1743 		/* Fall through to ACK */
1744 	}
1745 	case TCP_TW_ACK:
1746 		tcp_v4_timewait_ack(sk, skb);
1747 		break;
1748 	case TCP_TW_RST:
1749 		goto no_tcp_socket;
1750 	case TCP_TW_SUCCESS:;
1751 	}
1752 	goto discard_it;
1753 }
1754 
1755 /* VJ's idea. Save last timestamp seen from this destination
1756  * and hold it at least for normal timewait interval to use for duplicate
1757  * segment detection in subsequent connections, before they enter synchronized
1758  * state.
1759  */
1760 
1761 int tcp_v4_remember_stamp(struct sock *sk)
1762 {
1763 	struct inet_sock *inet = inet_sk(sk);
1764 	struct tcp_sock *tp = tcp_sk(sk);
1765 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1766 	struct inet_peer *peer = NULL;
1767 	int release_it = 0;
1768 
1769 	if (!rt || rt->rt_dst != inet->daddr) {
1770 		peer = inet_getpeer(inet->daddr, 1);
1771 		release_it = 1;
1772 	} else {
1773 		if (!rt->peer)
1774 			rt_bind_peer(rt, 1);
1775 		peer = rt->peer;
1776 	}
1777 
1778 	if (peer) {
1779 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1780 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1781 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1782 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1783 			peer->tcp_ts = tp->rx_opt.ts_recent;
1784 		}
1785 		if (release_it)
1786 			inet_putpeer(peer);
1787 		return 1;
1788 	}
1789 
1790 	return 0;
1791 }
1792 
1793 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1794 {
1795 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1796 
1797 	if (peer) {
1798 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1799 
1800 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1801 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1802 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1803 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1804 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1805 		}
1806 		inet_putpeer(peer);
1807 		return 1;
1808 	}
1809 
1810 	return 0;
1811 }
1812 
1813 struct inet_connection_sock_af_ops ipv4_specific = {
1814 	.queue_xmit	   = ip_queue_xmit,
1815 	.send_check	   = tcp_v4_send_check,
1816 	.rebuild_header	   = inet_sk_rebuild_header,
1817 	.conn_request	   = tcp_v4_conn_request,
1818 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1819 	.remember_stamp	   = tcp_v4_remember_stamp,
1820 	.net_header_len	   = sizeof(struct iphdr),
1821 	.setsockopt	   = ip_setsockopt,
1822 	.getsockopt	   = ip_getsockopt,
1823 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1824 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1825 #ifdef CONFIG_COMPAT
1826 	.compat_setsockopt = compat_ip_setsockopt,
1827 	.compat_getsockopt = compat_ip_getsockopt,
1828 #endif
1829 };
1830 
1831 #ifdef CONFIG_TCP_MD5SIG
1832 static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1833 	.md5_lookup		= tcp_v4_md5_lookup,
1834 	.calc_md5_hash		= tcp_v4_calc_md5_hash,
1835 	.md5_add		= tcp_v4_md5_add_func,
1836 	.md5_parse		= tcp_v4_parse_md5_keys,
1837 };
1838 #endif
1839 
1840 /* NOTE: A lot of things set to zero explicitly by call to
1841  *       sk_alloc() so need not be done here.
1842  */
1843 static int tcp_v4_init_sock(struct sock *sk)
1844 {
1845 	struct inet_connection_sock *icsk = inet_csk(sk);
1846 	struct tcp_sock *tp = tcp_sk(sk);
1847 
1848 	skb_queue_head_init(&tp->out_of_order_queue);
1849 	tcp_init_xmit_timers(sk);
1850 	tcp_prequeue_init(tp);
1851 
1852 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1853 	tp->mdev = TCP_TIMEOUT_INIT;
1854 
1855 	/* So many TCP implementations out there (incorrectly) count the
1856 	 * initial SYN frame in their delayed-ACK and congestion control
1857 	 * algorithms that we must have the following bandaid to talk
1858 	 * efficiently to them.  -DaveM
1859 	 */
1860 	tp->snd_cwnd = 2;
1861 
1862 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1863 	 * initialization of these values.
1864 	 */
1865 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
1866 	tp->snd_cwnd_clamp = ~0;
1867 	tp->mss_cache = 536;
1868 
1869 	tp->reordering = sysctl_tcp_reordering;
1870 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1871 
1872 	sk->sk_state = TCP_CLOSE;
1873 
1874 	sk->sk_write_space = sk_stream_write_space;
1875 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1876 
1877 	icsk->icsk_af_ops = &ipv4_specific;
1878 	icsk->icsk_sync_mss = tcp_sync_mss;
1879 #ifdef CONFIG_TCP_MD5SIG
1880 	tp->af_specific = &tcp_sock_ipv4_specific;
1881 #endif
1882 
1883 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1884 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1885 
1886 	atomic_inc(&tcp_sockets_allocated);
1887 
1888 	return 0;
1889 }
1890 
1891 int tcp_v4_destroy_sock(struct sock *sk)
1892 {
1893 	struct tcp_sock *tp = tcp_sk(sk);
1894 
1895 	tcp_clear_xmit_timers(sk);
1896 
1897 	tcp_cleanup_congestion_control(sk);
1898 
1899 	/* Cleanup up the write buffer. */
1900 	tcp_write_queue_purge(sk);
1901 
1902 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1903 	__skb_queue_purge(&tp->out_of_order_queue);
1904 
1905 #ifdef CONFIG_TCP_MD5SIG
1906 	/* Clean up the MD5 key list, if any */
1907 	if (tp->md5sig_info) {
1908 		tcp_v4_clear_md5_list(sk);
1909 		kfree(tp->md5sig_info);
1910 		tp->md5sig_info = NULL;
1911 	}
1912 #endif
1913 
1914 #ifdef CONFIG_NET_DMA
1915 	/* Cleans up our sk_async_wait_queue */
1916 	__skb_queue_purge(&sk->sk_async_wait_queue);
1917 #endif
1918 
1919 	/* Clean prequeue, it must be empty really */
1920 	__skb_queue_purge(&tp->ucopy.prequeue);
1921 
1922 	/* Clean up a referenced TCP bind bucket. */
1923 	if (inet_csk(sk)->icsk_bind_hash)
1924 		inet_put_port(&tcp_hashinfo, sk);
1925 
1926 	/*
1927 	 * If sendmsg cached page exists, toss it.
1928 	 */
1929 	if (sk->sk_sndmsg_page) {
1930 		__free_page(sk->sk_sndmsg_page);
1931 		sk->sk_sndmsg_page = NULL;
1932 	}
1933 
1934 	atomic_dec(&tcp_sockets_allocated);
1935 
1936 	return 0;
1937 }
1938 
1939 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1940 
1941 #ifdef CONFIG_PROC_FS
1942 /* Proc filesystem TCP sock list dumping. */
1943 
1944 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1945 {
1946 	return hlist_empty(head) ? NULL :
1947 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1948 }
1949 
1950 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1951 {
1952 	return tw->tw_node.next ?
1953 		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1954 }
1955 
1956 static void *listening_get_next(struct seq_file *seq, void *cur)
1957 {
1958 	struct inet_connection_sock *icsk;
1959 	struct hlist_node *node;
1960 	struct sock *sk = cur;
1961 	struct tcp_iter_state* st = seq->private;
1962 
1963 	if (!sk) {
1964 		st->bucket = 0;
1965 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1966 		goto get_sk;
1967 	}
1968 
1969 	++st->num;
1970 
1971 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1972 		struct request_sock *req = cur;
1973 
1974 		icsk = inet_csk(st->syn_wait_sk);
1975 		req = req->dl_next;
1976 		while (1) {
1977 			while (req) {
1978 				if (req->rsk_ops->family == st->family) {
1979 					cur = req;
1980 					goto out;
1981 				}
1982 				req = req->dl_next;
1983 			}
1984 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1985 				break;
1986 get_req:
1987 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1988 		}
1989 		sk	  = sk_next(st->syn_wait_sk);
1990 		st->state = TCP_SEQ_STATE_LISTENING;
1991 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1992 	} else {
1993 		icsk = inet_csk(sk);
1994 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1995 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1996 			goto start_req;
1997 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1998 		sk = sk_next(sk);
1999 	}
2000 get_sk:
2001 	sk_for_each_from(sk, node) {
2002 		if (sk->sk_family == st->family) {
2003 			cur = sk;
2004 			goto out;
2005 		}
2006 		icsk = inet_csk(sk);
2007 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2008 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2009 start_req:
2010 			st->uid		= sock_i_uid(sk);
2011 			st->syn_wait_sk = sk;
2012 			st->state	= TCP_SEQ_STATE_OPENREQ;
2013 			st->sbucket	= 0;
2014 			goto get_req;
2015 		}
2016 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2017 	}
2018 	if (++st->bucket < INET_LHTABLE_SIZE) {
2019 		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2020 		goto get_sk;
2021 	}
2022 	cur = NULL;
2023 out:
2024 	return cur;
2025 }
2026 
2027 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2028 {
2029 	void *rc = listening_get_next(seq, NULL);
2030 
2031 	while (rc && *pos) {
2032 		rc = listening_get_next(seq, rc);
2033 		--*pos;
2034 	}
2035 	return rc;
2036 }
2037 
2038 static void *established_get_first(struct seq_file *seq)
2039 {
2040 	struct tcp_iter_state* st = seq->private;
2041 	void *rc = NULL;
2042 
2043 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2044 		struct sock *sk;
2045 		struct hlist_node *node;
2046 		struct inet_timewait_sock *tw;
2047 
2048 		/* We can reschedule _before_ having picked the target: */
2049 		cond_resched_softirq();
2050 
2051 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2052 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2053 			if (sk->sk_family != st->family) {
2054 				continue;
2055 			}
2056 			rc = sk;
2057 			goto out;
2058 		}
2059 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2060 		inet_twsk_for_each(tw, node,
2061 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2062 			if (tw->tw_family != st->family) {
2063 				continue;
2064 			}
2065 			rc = tw;
2066 			goto out;
2067 		}
2068 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2069 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2070 	}
2071 out:
2072 	return rc;
2073 }
2074 
2075 static void *established_get_next(struct seq_file *seq, void *cur)
2076 {
2077 	struct sock *sk = cur;
2078 	struct inet_timewait_sock *tw;
2079 	struct hlist_node *node;
2080 	struct tcp_iter_state* st = seq->private;
2081 
2082 	++st->num;
2083 
2084 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2085 		tw = cur;
2086 		tw = tw_next(tw);
2087 get_tw:
2088 		while (tw && tw->tw_family != st->family) {
2089 			tw = tw_next(tw);
2090 		}
2091 		if (tw) {
2092 			cur = tw;
2093 			goto out;
2094 		}
2095 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2096 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2097 
2098 		/* We can reschedule between buckets: */
2099 		cond_resched_softirq();
2100 
2101 		if (++st->bucket < tcp_hashinfo.ehash_size) {
2102 			read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2103 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2104 		} else {
2105 			cur = NULL;
2106 			goto out;
2107 		}
2108 	} else
2109 		sk = sk_next(sk);
2110 
2111 	sk_for_each_from(sk, node) {
2112 		if (sk->sk_family == st->family)
2113 			goto found;
2114 	}
2115 
2116 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2117 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2118 	goto get_tw;
2119 found:
2120 	cur = sk;
2121 out:
2122 	return cur;
2123 }
2124 
2125 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2126 {
2127 	void *rc = established_get_first(seq);
2128 
2129 	while (rc && pos) {
2130 		rc = established_get_next(seq, rc);
2131 		--pos;
2132 	}
2133 	return rc;
2134 }
2135 
2136 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2137 {
2138 	void *rc;
2139 	struct tcp_iter_state* st = seq->private;
2140 
2141 	inet_listen_lock(&tcp_hashinfo);
2142 	st->state = TCP_SEQ_STATE_LISTENING;
2143 	rc	  = listening_get_idx(seq, &pos);
2144 
2145 	if (!rc) {
2146 		inet_listen_unlock(&tcp_hashinfo);
2147 		local_bh_disable();
2148 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2149 		rc	  = established_get_idx(seq, pos);
2150 	}
2151 
2152 	return rc;
2153 }
2154 
2155 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2156 {
2157 	struct tcp_iter_state* st = seq->private;
2158 	st->state = TCP_SEQ_STATE_LISTENING;
2159 	st->num = 0;
2160 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2161 }
2162 
2163 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2164 {
2165 	void *rc = NULL;
2166 	struct tcp_iter_state* st;
2167 
2168 	if (v == SEQ_START_TOKEN) {
2169 		rc = tcp_get_idx(seq, 0);
2170 		goto out;
2171 	}
2172 	st = seq->private;
2173 
2174 	switch (st->state) {
2175 	case TCP_SEQ_STATE_OPENREQ:
2176 	case TCP_SEQ_STATE_LISTENING:
2177 		rc = listening_get_next(seq, v);
2178 		if (!rc) {
2179 			inet_listen_unlock(&tcp_hashinfo);
2180 			local_bh_disable();
2181 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2182 			rc	  = established_get_first(seq);
2183 		}
2184 		break;
2185 	case TCP_SEQ_STATE_ESTABLISHED:
2186 	case TCP_SEQ_STATE_TIME_WAIT:
2187 		rc = established_get_next(seq, v);
2188 		break;
2189 	}
2190 out:
2191 	++*pos;
2192 	return rc;
2193 }
2194 
2195 static void tcp_seq_stop(struct seq_file *seq, void *v)
2196 {
2197 	struct tcp_iter_state* st = seq->private;
2198 
2199 	switch (st->state) {
2200 	case TCP_SEQ_STATE_OPENREQ:
2201 		if (v) {
2202 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2203 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2204 		}
2205 	case TCP_SEQ_STATE_LISTENING:
2206 		if (v != SEQ_START_TOKEN)
2207 			inet_listen_unlock(&tcp_hashinfo);
2208 		break;
2209 	case TCP_SEQ_STATE_TIME_WAIT:
2210 	case TCP_SEQ_STATE_ESTABLISHED:
2211 		if (v)
2212 			read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2213 		local_bh_enable();
2214 		break;
2215 	}
2216 }
2217 
2218 static int tcp_seq_open(struct inode *inode, struct file *file)
2219 {
2220 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2221 	struct seq_file *seq;
2222 	struct tcp_iter_state *s;
2223 	int rc;
2224 
2225 	if (unlikely(afinfo == NULL))
2226 		return -EINVAL;
2227 
2228 	s = kzalloc(sizeof(*s), GFP_KERNEL);
2229 	if (!s)
2230 		return -ENOMEM;
2231 	s->family		= afinfo->family;
2232 	s->seq_ops.start	= tcp_seq_start;
2233 	s->seq_ops.next		= tcp_seq_next;
2234 	s->seq_ops.show		= afinfo->seq_show;
2235 	s->seq_ops.stop		= tcp_seq_stop;
2236 
2237 	rc = seq_open(file, &s->seq_ops);
2238 	if (rc)
2239 		goto out_kfree;
2240 	seq	     = file->private_data;
2241 	seq->private = s;
2242 out:
2243 	return rc;
2244 out_kfree:
2245 	kfree(s);
2246 	goto out;
2247 }
2248 
2249 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2250 {
2251 	int rc = 0;
2252 	struct proc_dir_entry *p;
2253 
2254 	if (!afinfo)
2255 		return -EINVAL;
2256 	afinfo->seq_fops->owner		= afinfo->owner;
2257 	afinfo->seq_fops->open		= tcp_seq_open;
2258 	afinfo->seq_fops->read		= seq_read;
2259 	afinfo->seq_fops->llseek	= seq_lseek;
2260 	afinfo->seq_fops->release	= seq_release_private;
2261 
2262 	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2263 	if (p)
2264 		p->data = afinfo;
2265 	else
2266 		rc = -ENOMEM;
2267 	return rc;
2268 }
2269 
2270 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2271 {
2272 	if (!afinfo)
2273 		return;
2274 	proc_net_remove(afinfo->name);
2275 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2276 }
2277 
2278 static void get_openreq4(struct sock *sk, struct request_sock *req,
2279 			 char *tmpbuf, int i, int uid)
2280 {
2281 	const struct inet_request_sock *ireq = inet_rsk(req);
2282 	int ttd = req->expires - jiffies;
2283 
2284 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2285 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2286 		i,
2287 		ireq->loc_addr,
2288 		ntohs(inet_sk(sk)->sport),
2289 		ireq->rmt_addr,
2290 		ntohs(ireq->rmt_port),
2291 		TCP_SYN_RECV,
2292 		0, 0, /* could print option size, but that is af dependent. */
2293 		1,    /* timers active (only the expire timer) */
2294 		jiffies_to_clock_t(ttd),
2295 		req->retrans,
2296 		uid,
2297 		0,  /* non standard timer */
2298 		0, /* open_requests have no inode */
2299 		atomic_read(&sk->sk_refcnt),
2300 		req);
2301 }
2302 
2303 static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
2304 {
2305 	int timer_active;
2306 	unsigned long timer_expires;
2307 	struct tcp_sock *tp = tcp_sk(sk);
2308 	const struct inet_connection_sock *icsk = inet_csk(sk);
2309 	struct inet_sock *inet = inet_sk(sk);
2310 	__be32 dest = inet->daddr;
2311 	__be32 src = inet->rcv_saddr;
2312 	__u16 destp = ntohs(inet->dport);
2313 	__u16 srcp = ntohs(inet->sport);
2314 
2315 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2316 		timer_active	= 1;
2317 		timer_expires	= icsk->icsk_timeout;
2318 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2319 		timer_active	= 4;
2320 		timer_expires	= icsk->icsk_timeout;
2321 	} else if (timer_pending(&sk->sk_timer)) {
2322 		timer_active	= 2;
2323 		timer_expires	= sk->sk_timer.expires;
2324 	} else {
2325 		timer_active	= 0;
2326 		timer_expires = jiffies;
2327 	}
2328 
2329 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2330 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
2331 		i, src, srcp, dest, destp, sk->sk_state,
2332 		tp->write_seq - tp->snd_una,
2333 		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2334 					     (tp->rcv_nxt - tp->copied_seq),
2335 		timer_active,
2336 		jiffies_to_clock_t(timer_expires - jiffies),
2337 		icsk->icsk_retransmits,
2338 		sock_i_uid(sk),
2339 		icsk->icsk_probes_out,
2340 		sock_i_ino(sk),
2341 		atomic_read(&sk->sk_refcnt), sk,
2342 		icsk->icsk_rto,
2343 		icsk->icsk_ack.ato,
2344 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2345 		tp->snd_cwnd,
2346 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2347 }
2348 
2349 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2350 			       char *tmpbuf, int i)
2351 {
2352 	__be32 dest, src;
2353 	__u16 destp, srcp;
2354 	int ttd = tw->tw_ttd - jiffies;
2355 
2356 	if (ttd < 0)
2357 		ttd = 0;
2358 
2359 	dest  = tw->tw_daddr;
2360 	src   = tw->tw_rcv_saddr;
2361 	destp = ntohs(tw->tw_dport);
2362 	srcp  = ntohs(tw->tw_sport);
2363 
2364 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2365 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2366 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2367 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2368 		atomic_read(&tw->tw_refcnt), tw);
2369 }
2370 
2371 #define TMPSZ 150
2372 
2373 static int tcp4_seq_show(struct seq_file *seq, void *v)
2374 {
2375 	struct tcp_iter_state* st;
2376 	char tmpbuf[TMPSZ + 1];
2377 
2378 	if (v == SEQ_START_TOKEN) {
2379 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2380 			   "  sl  local_address rem_address   st tx_queue "
2381 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2382 			   "inode");
2383 		goto out;
2384 	}
2385 	st = seq->private;
2386 
2387 	switch (st->state) {
2388 	case TCP_SEQ_STATE_LISTENING:
2389 	case TCP_SEQ_STATE_ESTABLISHED:
2390 		get_tcp4_sock(v, tmpbuf, st->num);
2391 		break;
2392 	case TCP_SEQ_STATE_OPENREQ:
2393 		get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2394 		break;
2395 	case TCP_SEQ_STATE_TIME_WAIT:
2396 		get_timewait4_sock(v, tmpbuf, st->num);
2397 		break;
2398 	}
2399 	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2400 out:
2401 	return 0;
2402 }
2403 
2404 static struct file_operations tcp4_seq_fops;
2405 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2406 	.owner		= THIS_MODULE,
2407 	.name		= "tcp",
2408 	.family		= AF_INET,
2409 	.seq_show	= tcp4_seq_show,
2410 	.seq_fops	= &tcp4_seq_fops,
2411 };
2412 
2413 int __init tcp4_proc_init(void)
2414 {
2415 	return tcp_proc_register(&tcp4_seq_afinfo);
2416 }
2417 
2418 void tcp4_proc_exit(void)
2419 {
2420 	tcp_proc_unregister(&tcp4_seq_afinfo);
2421 }
2422 #endif /* CONFIG_PROC_FS */
2423 
2424 struct proto tcp_prot = {
2425 	.name			= "TCP",
2426 	.owner			= THIS_MODULE,
2427 	.close			= tcp_close,
2428 	.connect		= tcp_v4_connect,
2429 	.disconnect		= tcp_disconnect,
2430 	.accept			= inet_csk_accept,
2431 	.ioctl			= tcp_ioctl,
2432 	.init			= tcp_v4_init_sock,
2433 	.destroy		= tcp_v4_destroy_sock,
2434 	.shutdown		= tcp_shutdown,
2435 	.setsockopt		= tcp_setsockopt,
2436 	.getsockopt		= tcp_getsockopt,
2437 	.sendmsg		= tcp_sendmsg,
2438 	.recvmsg		= tcp_recvmsg,
2439 	.backlog_rcv		= tcp_v4_do_rcv,
2440 	.hash			= tcp_v4_hash,
2441 	.unhash			= tcp_unhash,
2442 	.get_port		= tcp_v4_get_port,
2443 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2444 	.sockets_allocated	= &tcp_sockets_allocated,
2445 	.orphan_count		= &tcp_orphan_count,
2446 	.memory_allocated	= &tcp_memory_allocated,
2447 	.memory_pressure	= &tcp_memory_pressure,
2448 	.sysctl_mem		= sysctl_tcp_mem,
2449 	.sysctl_wmem		= sysctl_tcp_wmem,
2450 	.sysctl_rmem		= sysctl_tcp_rmem,
2451 	.max_header		= MAX_TCP_HEADER,
2452 	.obj_size		= sizeof(struct tcp_sock),
2453 	.twsk_prot		= &tcp_timewait_sock_ops,
2454 	.rsk_prot		= &tcp_request_sock_ops,
2455 #ifdef CONFIG_COMPAT
2456 	.compat_setsockopt	= compat_tcp_setsockopt,
2457 	.compat_getsockopt	= compat_tcp_getsockopt,
2458 #endif
2459 };
2460 
2461 void __init tcp_v4_init(struct net_proto_family *ops)
2462 {
2463 	if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
2464 				     IPPROTO_TCP) < 0)
2465 		panic("Failed to create the TCP control socket.\n");
2466 }
2467 
2468 EXPORT_SYMBOL(ipv4_specific);
2469 EXPORT_SYMBOL(tcp_hashinfo);
2470 EXPORT_SYMBOL(tcp_prot);
2471 EXPORT_SYMBOL(tcp_unhash);
2472 EXPORT_SYMBOL(tcp_v4_conn_request);
2473 EXPORT_SYMBOL(tcp_v4_connect);
2474 EXPORT_SYMBOL(tcp_v4_do_rcv);
2475 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2476 EXPORT_SYMBOL(tcp_v4_send_check);
2477 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2478 
2479 #ifdef CONFIG_PROC_FS
2480 EXPORT_SYMBOL(tcp_proc_register);
2481 EXPORT_SYMBOL(tcp_proc_unregister);
2482 #endif
2483 EXPORT_SYMBOL(sysctl_local_port_range);
2484 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2485 
2486