xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision af9b4738574b46025de7ccbe75c7b24fd8914379)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 
54 #include <linux/bottom_half.h>
55 #include <linux/types.h>
56 #include <linux/fcntl.h>
57 #include <linux/module.h>
58 #include <linux/random.h>
59 #include <linux/cache.h>
60 #include <linux/jhash.h>
61 #include <linux/init.h>
62 #include <linux/times.h>
63 #include <linux/slab.h>
64 
65 #include <net/net_namespace.h>
66 #include <net/icmp.h>
67 #include <net/inet_hashtables.h>
68 #include <net/tcp.h>
69 #include <net/transp_v6.h>
70 #include <net/ipv6.h>
71 #include <net/inet_common.h>
72 #include <net/timewait_sock.h>
73 #include <net/xfrm.h>
74 #include <net/netdma.h>
75 
76 #include <linux/inet.h>
77 #include <linux/ipv6.h>
78 #include <linux/stddef.h>
79 #include <linux/proc_fs.h>
80 #include <linux/seq_file.h>
81 
82 #include <linux/crypto.h>
83 #include <linux/scatterlist.h>
84 
85 int sysctl_tcp_tw_reuse __read_mostly;
86 int sysctl_tcp_low_latency __read_mostly;
87 
88 
89 #ifdef CONFIG_TCP_MD5SIG
90 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
91 						   __be32 addr);
92 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
93 			       __be32 daddr, __be32 saddr, struct tcphdr *th);
94 #else
95 static inline
96 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
97 {
98 	return NULL;
99 }
100 #endif
101 
102 struct inet_hashinfo tcp_hashinfo;
103 
104 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
105 {
106 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
107 					  ip_hdr(skb)->saddr,
108 					  tcp_hdr(skb)->dest,
109 					  tcp_hdr(skb)->source);
110 }
111 
112 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
113 {
114 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
115 	struct tcp_sock *tp = tcp_sk(sk);
116 
117 	/* With PAWS, it is safe from the viewpoint
118 	   of data integrity. Even without PAWS it is safe provided sequence
119 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
120 
121 	   Actually, the idea is close to VJ's one, only timestamp cache is
122 	   held not per host, but per port pair and TW bucket is used as state
123 	   holder.
124 
125 	   If TW bucket has been already destroyed we fall back to VJ's scheme
126 	   and use initial timestamp retrieved from peer table.
127 	 */
128 	if (tcptw->tw_ts_recent_stamp &&
129 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
130 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
131 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
132 		if (tp->write_seq == 0)
133 			tp->write_seq = 1;
134 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
135 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
136 		sock_hold(sktw);
137 		return 1;
138 	}
139 
140 	return 0;
141 }
142 
143 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
144 
145 /* This will initiate an outgoing connection. */
146 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
147 {
148 	struct inet_sock *inet = inet_sk(sk);
149 	struct tcp_sock *tp = tcp_sk(sk);
150 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
151 	struct rtable *rt;
152 	__be32 daddr, nexthop;
153 	int tmp;
154 	int err;
155 
156 	if (addr_len < sizeof(struct sockaddr_in))
157 		return -EINVAL;
158 
159 	if (usin->sin_family != AF_INET)
160 		return -EAFNOSUPPORT;
161 
162 	nexthop = daddr = usin->sin_addr.s_addr;
163 	if (inet->opt && inet->opt->srr) {
164 		if (!daddr)
165 			return -EINVAL;
166 		nexthop = inet->opt->faddr;
167 	}
168 
169 	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
170 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
171 			       IPPROTO_TCP,
172 			       inet->inet_sport, usin->sin_port, sk, 1);
173 	if (tmp < 0) {
174 		if (tmp == -ENETUNREACH)
175 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
176 		return tmp;
177 	}
178 
179 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
180 		ip_rt_put(rt);
181 		return -ENETUNREACH;
182 	}
183 
184 	if (!inet->opt || !inet->opt->srr)
185 		daddr = rt->rt_dst;
186 
187 	if (!inet->inet_saddr)
188 		inet->inet_saddr = rt->rt_src;
189 	inet->inet_rcv_saddr = inet->inet_saddr;
190 
191 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
192 		/* Reset inherited state */
193 		tp->rx_opt.ts_recent	   = 0;
194 		tp->rx_opt.ts_recent_stamp = 0;
195 		tp->write_seq		   = 0;
196 	}
197 
198 	if (tcp_death_row.sysctl_tw_recycle &&
199 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
200 		struct inet_peer *peer = rt_get_peer(rt);
201 		/*
202 		 * VJ's idea. We save last timestamp seen from
203 		 * the destination in peer table, when entering state
204 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
205 		 * when trying new connection.
206 		 */
207 		if (peer != NULL &&
208 		    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
209 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
210 			tp->rx_opt.ts_recent = peer->tcp_ts;
211 		}
212 	}
213 
214 	inet->inet_dport = usin->sin_port;
215 	inet->inet_daddr = daddr;
216 
217 	inet_csk(sk)->icsk_ext_hdr_len = 0;
218 	if (inet->opt)
219 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
220 
221 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
222 
223 	/* Socket identity is still unknown (sport may be zero).
224 	 * However we set state to SYN-SENT and not releasing socket
225 	 * lock select source port, enter ourselves into the hash tables and
226 	 * complete initialization after this.
227 	 */
228 	tcp_set_state(sk, TCP_SYN_SENT);
229 	err = inet_hash_connect(&tcp_death_row, sk);
230 	if (err)
231 		goto failure;
232 
233 	err = ip_route_newports(&rt, IPPROTO_TCP,
234 				inet->inet_sport, inet->inet_dport, sk);
235 	if (err)
236 		goto failure;
237 
238 	/* OK, now commit destination to socket.  */
239 	sk->sk_gso_type = SKB_GSO_TCPV4;
240 	sk_setup_caps(sk, &rt->u.dst);
241 
242 	if (!tp->write_seq)
243 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
244 							   inet->inet_daddr,
245 							   inet->inet_sport,
246 							   usin->sin_port);
247 
248 	inet->inet_id = tp->write_seq ^ jiffies;
249 
250 	err = tcp_connect(sk);
251 	rt = NULL;
252 	if (err)
253 		goto failure;
254 
255 	return 0;
256 
257 failure:
258 	/*
259 	 * This unhashes the socket and releases the local port,
260 	 * if necessary.
261 	 */
262 	tcp_set_state(sk, TCP_CLOSE);
263 	ip_rt_put(rt);
264 	sk->sk_route_caps = 0;
265 	inet->inet_dport = 0;
266 	return err;
267 }
268 
269 /*
270  * This routine does path mtu discovery as defined in RFC1191.
271  */
272 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
273 {
274 	struct dst_entry *dst;
275 	struct inet_sock *inet = inet_sk(sk);
276 
277 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
278 	 * send out by Linux are always <576bytes so they should go through
279 	 * unfragmented).
280 	 */
281 	if (sk->sk_state == TCP_LISTEN)
282 		return;
283 
284 	/* We don't check in the destentry if pmtu discovery is forbidden
285 	 * on this route. We just assume that no packet_to_big packets
286 	 * are send back when pmtu discovery is not active.
287 	 * There is a small race when the user changes this flag in the
288 	 * route, but I think that's acceptable.
289 	 */
290 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
291 		return;
292 
293 	dst->ops->update_pmtu(dst, mtu);
294 
295 	/* Something is about to be wrong... Remember soft error
296 	 * for the case, if this connection will not able to recover.
297 	 */
298 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
299 		sk->sk_err_soft = EMSGSIZE;
300 
301 	mtu = dst_mtu(dst);
302 
303 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
304 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
305 		tcp_sync_mss(sk, mtu);
306 
307 		/* Resend the TCP packet because it's
308 		 * clear that the old packet has been
309 		 * dropped. This is the new "fast" path mtu
310 		 * discovery.
311 		 */
312 		tcp_simple_retransmit(sk);
313 	} /* else let the usual retransmit timer handle it */
314 }
315 
316 /*
317  * This routine is called by the ICMP module when it gets some
318  * sort of error condition.  If err < 0 then the socket should
319  * be closed and the error returned to the user.  If err > 0
320  * it's just the icmp type << 8 | icmp code.  After adjustment
321  * header points to the first 8 bytes of the tcp header.  We need
322  * to find the appropriate port.
323  *
324  * The locking strategy used here is very "optimistic". When
325  * someone else accesses the socket the ICMP is just dropped
326  * and for some paths there is no check at all.
327  * A more general error queue to queue errors for later handling
328  * is probably better.
329  *
330  */
331 
332 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
333 {
334 	struct iphdr *iph = (struct iphdr *)icmp_skb->data;
335 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
336 	struct inet_connection_sock *icsk;
337 	struct tcp_sock *tp;
338 	struct inet_sock *inet;
339 	const int type = icmp_hdr(icmp_skb)->type;
340 	const int code = icmp_hdr(icmp_skb)->code;
341 	struct sock *sk;
342 	struct sk_buff *skb;
343 	__u32 seq;
344 	__u32 remaining;
345 	int err;
346 	struct net *net = dev_net(icmp_skb->dev);
347 
348 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
349 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
350 		return;
351 	}
352 
353 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
354 			iph->saddr, th->source, inet_iif(icmp_skb));
355 	if (!sk) {
356 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
357 		return;
358 	}
359 	if (sk->sk_state == TCP_TIME_WAIT) {
360 		inet_twsk_put(inet_twsk(sk));
361 		return;
362 	}
363 
364 	bh_lock_sock(sk);
365 	/* If too many ICMPs get dropped on busy
366 	 * servers this needs to be solved differently.
367 	 */
368 	if (sock_owned_by_user(sk))
369 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 
371 	if (sk->sk_state == TCP_CLOSE)
372 		goto out;
373 
374 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 		goto out;
377 	}
378 
379 	icsk = inet_csk(sk);
380 	tp = tcp_sk(sk);
381 	seq = ntohl(th->seq);
382 	if (sk->sk_state != TCP_LISTEN &&
383 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
384 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
385 		goto out;
386 	}
387 
388 	switch (type) {
389 	case ICMP_SOURCE_QUENCH:
390 		/* Just silently ignore these. */
391 		goto out;
392 	case ICMP_PARAMETERPROB:
393 		err = EPROTO;
394 		break;
395 	case ICMP_DEST_UNREACH:
396 		if (code > NR_ICMP_UNREACH)
397 			goto out;
398 
399 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
400 			if (!sock_owned_by_user(sk))
401 				do_pmtu_discovery(sk, iph, info);
402 			goto out;
403 		}
404 
405 		err = icmp_err_convert[code].errno;
406 		/* check if icmp_skb allows revert of backoff
407 		 * (see draft-zimmermann-tcp-lcd) */
408 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
409 			break;
410 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
411 		    !icsk->icsk_backoff)
412 			break;
413 
414 		icsk->icsk_backoff--;
415 		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
416 					 icsk->icsk_backoff;
417 		tcp_bound_rto(sk);
418 
419 		skb = tcp_write_queue_head(sk);
420 		BUG_ON(!skb);
421 
422 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
423 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
424 
425 		if (remaining) {
426 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
427 						  remaining, TCP_RTO_MAX);
428 		} else if (sock_owned_by_user(sk)) {
429 			/* RTO revert clocked out retransmission,
430 			 * but socket is locked. Will defer. */
431 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
432 						  HZ/20, TCP_RTO_MAX);
433 		} else {
434 			/* RTO revert clocked out retransmission.
435 			 * Will retransmit now */
436 			tcp_retransmit_timer(sk);
437 		}
438 
439 		break;
440 	case ICMP_TIME_EXCEEDED:
441 		err = EHOSTUNREACH;
442 		break;
443 	default:
444 		goto out;
445 	}
446 
447 	switch (sk->sk_state) {
448 		struct request_sock *req, **prev;
449 	case TCP_LISTEN:
450 		if (sock_owned_by_user(sk))
451 			goto out;
452 
453 		req = inet_csk_search_req(sk, &prev, th->dest,
454 					  iph->daddr, iph->saddr);
455 		if (!req)
456 			goto out;
457 
458 		/* ICMPs are not backlogged, hence we cannot get
459 		   an established socket here.
460 		 */
461 		WARN_ON(req->sk);
462 
463 		if (seq != tcp_rsk(req)->snt_isn) {
464 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
465 			goto out;
466 		}
467 
468 		/*
469 		 * Still in SYN_RECV, just remove it silently.
470 		 * There is no good way to pass the error to the newly
471 		 * created socket, and POSIX does not want network
472 		 * errors returned from accept().
473 		 */
474 		inet_csk_reqsk_queue_drop(sk, req, prev);
475 		goto out;
476 
477 	case TCP_SYN_SENT:
478 	case TCP_SYN_RECV:  /* Cannot happen.
479 			       It can f.e. if SYNs crossed.
480 			     */
481 		if (!sock_owned_by_user(sk)) {
482 			sk->sk_err = err;
483 
484 			sk->sk_error_report(sk);
485 
486 			tcp_done(sk);
487 		} else {
488 			sk->sk_err_soft = err;
489 		}
490 		goto out;
491 	}
492 
493 	/* If we've already connected we will keep trying
494 	 * until we time out, or the user gives up.
495 	 *
496 	 * rfc1122 4.2.3.9 allows to consider as hard errors
497 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
498 	 * but it is obsoleted by pmtu discovery).
499 	 *
500 	 * Note, that in modern internet, where routing is unreliable
501 	 * and in each dark corner broken firewalls sit, sending random
502 	 * errors ordered by their masters even this two messages finally lose
503 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
504 	 *
505 	 * Now we are in compliance with RFCs.
506 	 *							--ANK (980905)
507 	 */
508 
509 	inet = inet_sk(sk);
510 	if (!sock_owned_by_user(sk) && inet->recverr) {
511 		sk->sk_err = err;
512 		sk->sk_error_report(sk);
513 	} else	{ /* Only an error on timeout */
514 		sk->sk_err_soft = err;
515 	}
516 
517 out:
518 	bh_unlock_sock(sk);
519 	sock_put(sk);
520 }
521 
522 static void __tcp_v4_send_check(struct sk_buff *skb,
523 				__be32 saddr, __be32 daddr)
524 {
525 	struct tcphdr *th = tcp_hdr(skb);
526 
527 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
528 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
529 		skb->csum_start = skb_transport_header(skb) - skb->head;
530 		skb->csum_offset = offsetof(struct tcphdr, check);
531 	} else {
532 		th->check = tcp_v4_check(skb->len, saddr, daddr,
533 					 csum_partial(th,
534 						      th->doff << 2,
535 						      skb->csum));
536 	}
537 }
538 
539 /* This routine computes an IPv4 TCP checksum. */
540 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
541 {
542 	struct inet_sock *inet = inet_sk(sk);
543 
544 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
545 }
546 
547 int tcp_v4_gso_send_check(struct sk_buff *skb)
548 {
549 	const struct iphdr *iph;
550 	struct tcphdr *th;
551 
552 	if (!pskb_may_pull(skb, sizeof(*th)))
553 		return -EINVAL;
554 
555 	iph = ip_hdr(skb);
556 	th = tcp_hdr(skb);
557 
558 	th->check = 0;
559 	skb->ip_summed = CHECKSUM_PARTIAL;
560 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
561 	return 0;
562 }
563 
564 /*
565  *	This routine will send an RST to the other tcp.
566  *
567  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
568  *		      for reset.
569  *	Answer: if a packet caused RST, it is not for a socket
570  *		existing in our system, if it is matched to a socket,
571  *		it is just duplicate segment or bug in other side's TCP.
572  *		So that we build reply only basing on parameters
573  *		arrived with segment.
574  *	Exception: precedence violation. We do not implement it in any case.
575  */
576 
577 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
578 {
579 	struct tcphdr *th = tcp_hdr(skb);
580 	struct {
581 		struct tcphdr th;
582 #ifdef CONFIG_TCP_MD5SIG
583 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
584 #endif
585 	} rep;
586 	struct ip_reply_arg arg;
587 #ifdef CONFIG_TCP_MD5SIG
588 	struct tcp_md5sig_key *key;
589 #endif
590 	struct net *net;
591 
592 	/* Never send a reset in response to a reset. */
593 	if (th->rst)
594 		return;
595 
596 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
597 		return;
598 
599 	/* Swap the send and the receive. */
600 	memset(&rep, 0, sizeof(rep));
601 	rep.th.dest   = th->source;
602 	rep.th.source = th->dest;
603 	rep.th.doff   = sizeof(struct tcphdr) / 4;
604 	rep.th.rst    = 1;
605 
606 	if (th->ack) {
607 		rep.th.seq = th->ack_seq;
608 	} else {
609 		rep.th.ack = 1;
610 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
611 				       skb->len - (th->doff << 2));
612 	}
613 
614 	memset(&arg, 0, sizeof(arg));
615 	arg.iov[0].iov_base = (unsigned char *)&rep;
616 	arg.iov[0].iov_len  = sizeof(rep.th);
617 
618 #ifdef CONFIG_TCP_MD5SIG
619 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
620 	if (key) {
621 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
622 				   (TCPOPT_NOP << 16) |
623 				   (TCPOPT_MD5SIG << 8) |
624 				   TCPOLEN_MD5SIG);
625 		/* Update length and the length the header thinks exists */
626 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
627 		rep.th.doff = arg.iov[0].iov_len / 4;
628 
629 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
630 				     key, ip_hdr(skb)->saddr,
631 				     ip_hdr(skb)->daddr, &rep.th);
632 	}
633 #endif
634 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
635 				      ip_hdr(skb)->saddr, /* XXX */
636 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
637 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
638 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
639 
640 	net = dev_net(skb_dst(skb)->dev);
641 	ip_send_reply(net->ipv4.tcp_sock, skb,
642 		      &arg, arg.iov[0].iov_len);
643 
644 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
645 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
646 }
647 
648 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
649    outside socket context is ugly, certainly. What can I do?
650  */
651 
652 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
653 			    u32 win, u32 ts, int oif,
654 			    struct tcp_md5sig_key *key,
655 			    int reply_flags)
656 {
657 	struct tcphdr *th = tcp_hdr(skb);
658 	struct {
659 		struct tcphdr th;
660 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
661 #ifdef CONFIG_TCP_MD5SIG
662 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
663 #endif
664 			];
665 	} rep;
666 	struct ip_reply_arg arg;
667 	struct net *net = dev_net(skb_dst(skb)->dev);
668 
669 	memset(&rep.th, 0, sizeof(struct tcphdr));
670 	memset(&arg, 0, sizeof(arg));
671 
672 	arg.iov[0].iov_base = (unsigned char *)&rep;
673 	arg.iov[0].iov_len  = sizeof(rep.th);
674 	if (ts) {
675 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
676 				   (TCPOPT_TIMESTAMP << 8) |
677 				   TCPOLEN_TIMESTAMP);
678 		rep.opt[1] = htonl(tcp_time_stamp);
679 		rep.opt[2] = htonl(ts);
680 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
681 	}
682 
683 	/* Swap the send and the receive. */
684 	rep.th.dest    = th->source;
685 	rep.th.source  = th->dest;
686 	rep.th.doff    = arg.iov[0].iov_len / 4;
687 	rep.th.seq     = htonl(seq);
688 	rep.th.ack_seq = htonl(ack);
689 	rep.th.ack     = 1;
690 	rep.th.window  = htons(win);
691 
692 #ifdef CONFIG_TCP_MD5SIG
693 	if (key) {
694 		int offset = (ts) ? 3 : 0;
695 
696 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
697 					  (TCPOPT_NOP << 16) |
698 					  (TCPOPT_MD5SIG << 8) |
699 					  TCPOLEN_MD5SIG);
700 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
701 		rep.th.doff = arg.iov[0].iov_len/4;
702 
703 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
704 				    key, ip_hdr(skb)->saddr,
705 				    ip_hdr(skb)->daddr, &rep.th);
706 	}
707 #endif
708 	arg.flags = reply_flags;
709 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
710 				      ip_hdr(skb)->saddr, /* XXX */
711 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
712 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
713 	if (oif)
714 		arg.bound_dev_if = oif;
715 
716 	ip_send_reply(net->ipv4.tcp_sock, skb,
717 		      &arg, arg.iov[0].iov_len);
718 
719 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
720 }
721 
722 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
723 {
724 	struct inet_timewait_sock *tw = inet_twsk(sk);
725 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
726 
727 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
728 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
729 			tcptw->tw_ts_recent,
730 			tw->tw_bound_dev_if,
731 			tcp_twsk_md5_key(tcptw),
732 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
733 			);
734 
735 	inet_twsk_put(tw);
736 }
737 
738 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
739 				  struct request_sock *req)
740 {
741 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
742 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
743 			req->ts_recent,
744 			0,
745 			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
746 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
747 }
748 
749 /*
750  *	Send a SYN-ACK after having received a SYN.
751  *	This still operates on a request_sock only, not on a big
752  *	socket.
753  */
754 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
755 			      struct request_sock *req,
756 			      struct request_values *rvp)
757 {
758 	const struct inet_request_sock *ireq = inet_rsk(req);
759 	int err = -1;
760 	struct sk_buff * skb;
761 
762 	/* First, grab a route. */
763 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
764 		return -1;
765 
766 	skb = tcp_make_synack(sk, dst, req, rvp);
767 
768 	if (skb) {
769 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
770 
771 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
772 					    ireq->rmt_addr,
773 					    ireq->opt);
774 		err = net_xmit_eval(err);
775 	}
776 
777 	dst_release(dst);
778 	return err;
779 }
780 
781 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
782 			      struct request_values *rvp)
783 {
784 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
785 	return tcp_v4_send_synack(sk, NULL, req, rvp);
786 }
787 
788 /*
789  *	IPv4 request_sock destructor.
790  */
791 static void tcp_v4_reqsk_destructor(struct request_sock *req)
792 {
793 	kfree(inet_rsk(req)->opt);
794 }
795 
796 static void syn_flood_warning(const struct sk_buff *skb)
797 {
798 	const char *msg;
799 
800 #ifdef CONFIG_SYN_COOKIES
801 	if (sysctl_tcp_syncookies)
802 		msg = "Sending cookies";
803 	else
804 #endif
805 		msg = "Dropping request";
806 
807 	pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
808 				ntohs(tcp_hdr(skb)->dest), msg);
809 }
810 
811 /*
812  * Save and compile IPv4 options into the request_sock if needed.
813  */
814 static struct ip_options *tcp_v4_save_options(struct sock *sk,
815 					      struct sk_buff *skb)
816 {
817 	struct ip_options *opt = &(IPCB(skb)->opt);
818 	struct ip_options *dopt = NULL;
819 
820 	if (opt && opt->optlen) {
821 		int opt_size = optlength(opt);
822 		dopt = kmalloc(opt_size, GFP_ATOMIC);
823 		if (dopt) {
824 			if (ip_options_echo(dopt, skb)) {
825 				kfree(dopt);
826 				dopt = NULL;
827 			}
828 		}
829 	}
830 	return dopt;
831 }
832 
833 #ifdef CONFIG_TCP_MD5SIG
834 /*
835  * RFC2385 MD5 checksumming requires a mapping of
836  * IP address->MD5 Key.
837  * We need to maintain these in the sk structure.
838  */
839 
840 /* Find the Key structure for an address.  */
841 static struct tcp_md5sig_key *
842 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
843 {
844 	struct tcp_sock *tp = tcp_sk(sk);
845 	int i;
846 
847 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
848 		return NULL;
849 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
850 		if (tp->md5sig_info->keys4[i].addr == addr)
851 			return &tp->md5sig_info->keys4[i].base;
852 	}
853 	return NULL;
854 }
855 
856 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
857 					 struct sock *addr_sk)
858 {
859 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
860 }
861 
862 EXPORT_SYMBOL(tcp_v4_md5_lookup);
863 
864 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
865 						      struct request_sock *req)
866 {
867 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
868 }
869 
870 /* This can be called on a newly created socket, from other files */
871 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
872 		      u8 *newkey, u8 newkeylen)
873 {
874 	/* Add Key to the list */
875 	struct tcp_md5sig_key *key;
876 	struct tcp_sock *tp = tcp_sk(sk);
877 	struct tcp4_md5sig_key *keys;
878 
879 	key = tcp_v4_md5_do_lookup(sk, addr);
880 	if (key) {
881 		/* Pre-existing entry - just update that one. */
882 		kfree(key->key);
883 		key->key = newkey;
884 		key->keylen = newkeylen;
885 	} else {
886 		struct tcp_md5sig_info *md5sig;
887 
888 		if (!tp->md5sig_info) {
889 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
890 						  GFP_ATOMIC);
891 			if (!tp->md5sig_info) {
892 				kfree(newkey);
893 				return -ENOMEM;
894 			}
895 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
896 		}
897 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
898 			kfree(newkey);
899 			return -ENOMEM;
900 		}
901 		md5sig = tp->md5sig_info;
902 
903 		if (md5sig->alloced4 == md5sig->entries4) {
904 			keys = kmalloc((sizeof(*keys) *
905 					(md5sig->entries4 + 1)), GFP_ATOMIC);
906 			if (!keys) {
907 				kfree(newkey);
908 				tcp_free_md5sig_pool();
909 				return -ENOMEM;
910 			}
911 
912 			if (md5sig->entries4)
913 				memcpy(keys, md5sig->keys4,
914 				       sizeof(*keys) * md5sig->entries4);
915 
916 			/* Free old key list, and reference new one */
917 			kfree(md5sig->keys4);
918 			md5sig->keys4 = keys;
919 			md5sig->alloced4++;
920 		}
921 		md5sig->entries4++;
922 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
923 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
924 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
925 	}
926 	return 0;
927 }
928 
929 EXPORT_SYMBOL(tcp_v4_md5_do_add);
930 
931 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
932 			       u8 *newkey, u8 newkeylen)
933 {
934 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
935 				 newkey, newkeylen);
936 }
937 
938 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
939 {
940 	struct tcp_sock *tp = tcp_sk(sk);
941 	int i;
942 
943 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
944 		if (tp->md5sig_info->keys4[i].addr == addr) {
945 			/* Free the key */
946 			kfree(tp->md5sig_info->keys4[i].base.key);
947 			tp->md5sig_info->entries4--;
948 
949 			if (tp->md5sig_info->entries4 == 0) {
950 				kfree(tp->md5sig_info->keys4);
951 				tp->md5sig_info->keys4 = NULL;
952 				tp->md5sig_info->alloced4 = 0;
953 			} else if (tp->md5sig_info->entries4 != i) {
954 				/* Need to do some manipulation */
955 				memmove(&tp->md5sig_info->keys4[i],
956 					&tp->md5sig_info->keys4[i+1],
957 					(tp->md5sig_info->entries4 - i) *
958 					 sizeof(struct tcp4_md5sig_key));
959 			}
960 			tcp_free_md5sig_pool();
961 			return 0;
962 		}
963 	}
964 	return -ENOENT;
965 }
966 
967 EXPORT_SYMBOL(tcp_v4_md5_do_del);
968 
969 static void tcp_v4_clear_md5_list(struct sock *sk)
970 {
971 	struct tcp_sock *tp = tcp_sk(sk);
972 
973 	/* Free each key, then the set of key keys,
974 	 * the crypto element, and then decrement our
975 	 * hold on the last resort crypto.
976 	 */
977 	if (tp->md5sig_info->entries4) {
978 		int i;
979 		for (i = 0; i < tp->md5sig_info->entries4; i++)
980 			kfree(tp->md5sig_info->keys4[i].base.key);
981 		tp->md5sig_info->entries4 = 0;
982 		tcp_free_md5sig_pool();
983 	}
984 	if (tp->md5sig_info->keys4) {
985 		kfree(tp->md5sig_info->keys4);
986 		tp->md5sig_info->keys4 = NULL;
987 		tp->md5sig_info->alloced4  = 0;
988 	}
989 }
990 
991 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
992 				 int optlen)
993 {
994 	struct tcp_md5sig cmd;
995 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
996 	u8 *newkey;
997 
998 	if (optlen < sizeof(cmd))
999 		return -EINVAL;
1000 
1001 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1002 		return -EFAULT;
1003 
1004 	if (sin->sin_family != AF_INET)
1005 		return -EINVAL;
1006 
1007 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1008 		if (!tcp_sk(sk)->md5sig_info)
1009 			return -ENOENT;
1010 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1011 	}
1012 
1013 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1014 		return -EINVAL;
1015 
1016 	if (!tcp_sk(sk)->md5sig_info) {
1017 		struct tcp_sock *tp = tcp_sk(sk);
1018 		struct tcp_md5sig_info *p;
1019 
1020 		p = kzalloc(sizeof(*p), sk->sk_allocation);
1021 		if (!p)
1022 			return -EINVAL;
1023 
1024 		tp->md5sig_info = p;
1025 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1026 	}
1027 
1028 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1029 	if (!newkey)
1030 		return -ENOMEM;
1031 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1032 				 newkey, cmd.tcpm_keylen);
1033 }
1034 
1035 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1036 					__be32 daddr, __be32 saddr, int nbytes)
1037 {
1038 	struct tcp4_pseudohdr *bp;
1039 	struct scatterlist sg;
1040 
1041 	bp = &hp->md5_blk.ip4;
1042 
1043 	/*
1044 	 * 1. the TCP pseudo-header (in the order: source IP address,
1045 	 * destination IP address, zero-padded protocol number, and
1046 	 * segment length)
1047 	 */
1048 	bp->saddr = saddr;
1049 	bp->daddr = daddr;
1050 	bp->pad = 0;
1051 	bp->protocol = IPPROTO_TCP;
1052 	bp->len = cpu_to_be16(nbytes);
1053 
1054 	sg_init_one(&sg, bp, sizeof(*bp));
1055 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1056 }
1057 
1058 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1059 			       __be32 daddr, __be32 saddr, struct tcphdr *th)
1060 {
1061 	struct tcp_md5sig_pool *hp;
1062 	struct hash_desc *desc;
1063 
1064 	hp = tcp_get_md5sig_pool();
1065 	if (!hp)
1066 		goto clear_hash_noput;
1067 	desc = &hp->md5_desc;
1068 
1069 	if (crypto_hash_init(desc))
1070 		goto clear_hash;
1071 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1072 		goto clear_hash;
1073 	if (tcp_md5_hash_header(hp, th))
1074 		goto clear_hash;
1075 	if (tcp_md5_hash_key(hp, key))
1076 		goto clear_hash;
1077 	if (crypto_hash_final(desc, md5_hash))
1078 		goto clear_hash;
1079 
1080 	tcp_put_md5sig_pool();
1081 	return 0;
1082 
1083 clear_hash:
1084 	tcp_put_md5sig_pool();
1085 clear_hash_noput:
1086 	memset(md5_hash, 0, 16);
1087 	return 1;
1088 }
1089 
1090 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1091 			struct sock *sk, struct request_sock *req,
1092 			struct sk_buff *skb)
1093 {
1094 	struct tcp_md5sig_pool *hp;
1095 	struct hash_desc *desc;
1096 	struct tcphdr *th = tcp_hdr(skb);
1097 	__be32 saddr, daddr;
1098 
1099 	if (sk) {
1100 		saddr = inet_sk(sk)->inet_saddr;
1101 		daddr = inet_sk(sk)->inet_daddr;
1102 	} else if (req) {
1103 		saddr = inet_rsk(req)->loc_addr;
1104 		daddr = inet_rsk(req)->rmt_addr;
1105 	} else {
1106 		const struct iphdr *iph = ip_hdr(skb);
1107 		saddr = iph->saddr;
1108 		daddr = iph->daddr;
1109 	}
1110 
1111 	hp = tcp_get_md5sig_pool();
1112 	if (!hp)
1113 		goto clear_hash_noput;
1114 	desc = &hp->md5_desc;
1115 
1116 	if (crypto_hash_init(desc))
1117 		goto clear_hash;
1118 
1119 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1120 		goto clear_hash;
1121 	if (tcp_md5_hash_header(hp, th))
1122 		goto clear_hash;
1123 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1124 		goto clear_hash;
1125 	if (tcp_md5_hash_key(hp, key))
1126 		goto clear_hash;
1127 	if (crypto_hash_final(desc, md5_hash))
1128 		goto clear_hash;
1129 
1130 	tcp_put_md5sig_pool();
1131 	return 0;
1132 
1133 clear_hash:
1134 	tcp_put_md5sig_pool();
1135 clear_hash_noput:
1136 	memset(md5_hash, 0, 16);
1137 	return 1;
1138 }
1139 
1140 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1141 
1142 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1143 {
1144 	/*
1145 	 * This gets called for each TCP segment that arrives
1146 	 * so we want to be efficient.
1147 	 * We have 3 drop cases:
1148 	 * o No MD5 hash and one expected.
1149 	 * o MD5 hash and we're not expecting one.
1150 	 * o MD5 hash and its wrong.
1151 	 */
1152 	__u8 *hash_location = NULL;
1153 	struct tcp_md5sig_key *hash_expected;
1154 	const struct iphdr *iph = ip_hdr(skb);
1155 	struct tcphdr *th = tcp_hdr(skb);
1156 	int genhash;
1157 	unsigned char newhash[16];
1158 
1159 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1160 	hash_location = tcp_parse_md5sig_option(th);
1161 
1162 	/* We've parsed the options - do we have a hash? */
1163 	if (!hash_expected && !hash_location)
1164 		return 0;
1165 
1166 	if (hash_expected && !hash_location) {
1167 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1168 		return 1;
1169 	}
1170 
1171 	if (!hash_expected && hash_location) {
1172 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1173 		return 1;
1174 	}
1175 
1176 	/* Okay, so this is hash_expected and hash_location -
1177 	 * so we need to calculate the checksum.
1178 	 */
1179 	genhash = tcp_v4_md5_hash_skb(newhash,
1180 				      hash_expected,
1181 				      NULL, NULL, skb);
1182 
1183 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1184 		if (net_ratelimit()) {
1185 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1186 			       &iph->saddr, ntohs(th->source),
1187 			       &iph->daddr, ntohs(th->dest),
1188 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1189 		}
1190 		return 1;
1191 	}
1192 	return 0;
1193 }
1194 
1195 #endif
1196 
1197 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1198 	.family		=	PF_INET,
1199 	.obj_size	=	sizeof(struct tcp_request_sock),
1200 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1201 	.send_ack	=	tcp_v4_reqsk_send_ack,
1202 	.destructor	=	tcp_v4_reqsk_destructor,
1203 	.send_reset	=	tcp_v4_send_reset,
1204 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1205 };
1206 
1207 #ifdef CONFIG_TCP_MD5SIG
1208 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1209 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1210 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1211 };
1212 #endif
1213 
1214 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1215 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1216 	.twsk_unique	= tcp_twsk_unique,
1217 	.twsk_destructor= tcp_twsk_destructor,
1218 };
1219 
1220 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1221 {
1222 	struct tcp_extend_values tmp_ext;
1223 	struct tcp_options_received tmp_opt;
1224 	u8 *hash_location;
1225 	struct request_sock *req;
1226 	struct inet_request_sock *ireq;
1227 	struct tcp_sock *tp = tcp_sk(sk);
1228 	struct dst_entry *dst = NULL;
1229 	__be32 saddr = ip_hdr(skb)->saddr;
1230 	__be32 daddr = ip_hdr(skb)->daddr;
1231 	__u32 isn = TCP_SKB_CB(skb)->when;
1232 #ifdef CONFIG_SYN_COOKIES
1233 	int want_cookie = 0;
1234 #else
1235 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1236 #endif
1237 
1238 	/* Never answer to SYNs send to broadcast or multicast */
1239 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1240 		goto drop;
1241 
1242 	/* TW buckets are converted to open requests without
1243 	 * limitations, they conserve resources and peer is
1244 	 * evidently real one.
1245 	 */
1246 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1247 		if (net_ratelimit())
1248 			syn_flood_warning(skb);
1249 #ifdef CONFIG_SYN_COOKIES
1250 		if (sysctl_tcp_syncookies) {
1251 			want_cookie = 1;
1252 		} else
1253 #endif
1254 		goto drop;
1255 	}
1256 
1257 	/* Accept backlog is full. If we have already queued enough
1258 	 * of warm entries in syn queue, drop request. It is better than
1259 	 * clogging syn queue with openreqs with exponentially increasing
1260 	 * timeout.
1261 	 */
1262 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1263 		goto drop;
1264 
1265 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1266 	if (!req)
1267 		goto drop;
1268 
1269 #ifdef CONFIG_TCP_MD5SIG
1270 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1271 #endif
1272 
1273 	tcp_clear_options(&tmp_opt);
1274 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1275 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1276 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1277 
1278 	if (tmp_opt.cookie_plus > 0 &&
1279 	    tmp_opt.saw_tstamp &&
1280 	    !tp->rx_opt.cookie_out_never &&
1281 	    (sysctl_tcp_cookie_size > 0 ||
1282 	     (tp->cookie_values != NULL &&
1283 	      tp->cookie_values->cookie_desired > 0))) {
1284 		u8 *c;
1285 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1286 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1287 
1288 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1289 			goto drop_and_release;
1290 
1291 		/* Secret recipe starts with IP addresses */
1292 		*mess++ ^= (__force u32)daddr;
1293 		*mess++ ^= (__force u32)saddr;
1294 
1295 		/* plus variable length Initiator Cookie */
1296 		c = (u8 *)mess;
1297 		while (l-- > 0)
1298 			*c++ ^= *hash_location++;
1299 
1300 #ifdef CONFIG_SYN_COOKIES
1301 		want_cookie = 0;	/* not our kind of cookie */
1302 #endif
1303 		tmp_ext.cookie_out_never = 0; /* false */
1304 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1305 	} else if (!tp->rx_opt.cookie_in_always) {
1306 		/* redundant indications, but ensure initialization. */
1307 		tmp_ext.cookie_out_never = 1; /* true */
1308 		tmp_ext.cookie_plus = 0;
1309 	} else {
1310 		goto drop_and_release;
1311 	}
1312 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1313 
1314 	if (want_cookie && !tmp_opt.saw_tstamp)
1315 		tcp_clear_options(&tmp_opt);
1316 
1317 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1318 	tcp_openreq_init(req, &tmp_opt, skb);
1319 
1320 	ireq = inet_rsk(req);
1321 	ireq->loc_addr = daddr;
1322 	ireq->rmt_addr = saddr;
1323 	ireq->no_srccheck = inet_sk(sk)->transparent;
1324 	ireq->opt = tcp_v4_save_options(sk, skb);
1325 
1326 	if (security_inet_conn_request(sk, skb, req))
1327 		goto drop_and_free;
1328 
1329 	if (!want_cookie)
1330 		TCP_ECN_create_request(req, tcp_hdr(skb));
1331 
1332 	if (want_cookie) {
1333 #ifdef CONFIG_SYN_COOKIES
1334 		req->cookie_ts = tmp_opt.tstamp_ok;
1335 #endif
1336 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1337 	} else if (!isn) {
1338 		struct inet_peer *peer = NULL;
1339 
1340 		/* VJ's idea. We save last timestamp seen
1341 		 * from the destination in peer table, when entering
1342 		 * state TIME-WAIT, and check against it before
1343 		 * accepting new connection request.
1344 		 *
1345 		 * If "isn" is not zero, this request hit alive
1346 		 * timewait bucket, so that all the necessary checks
1347 		 * are made in the function processing timewait state.
1348 		 */
1349 		if (tmp_opt.saw_tstamp &&
1350 		    tcp_death_row.sysctl_tw_recycle &&
1351 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1352 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1353 		    peer->v4daddr == saddr) {
1354 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1355 			    (s32)(peer->tcp_ts - req->ts_recent) >
1356 							TCP_PAWS_WINDOW) {
1357 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1358 				goto drop_and_release;
1359 			}
1360 		}
1361 		/* Kill the following clause, if you dislike this way. */
1362 		else if (!sysctl_tcp_syncookies &&
1363 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1364 			  (sysctl_max_syn_backlog >> 2)) &&
1365 			 (!peer || !peer->tcp_ts_stamp) &&
1366 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1367 			/* Without syncookies last quarter of
1368 			 * backlog is filled with destinations,
1369 			 * proven to be alive.
1370 			 * It means that we continue to communicate
1371 			 * to destinations, already remembered
1372 			 * to the moment of synflood.
1373 			 */
1374 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1375 				       &saddr, ntohs(tcp_hdr(skb)->source));
1376 			goto drop_and_release;
1377 		}
1378 
1379 		isn = tcp_v4_init_sequence(skb);
1380 	}
1381 	tcp_rsk(req)->snt_isn = isn;
1382 
1383 	if (tcp_v4_send_synack(sk, dst, req,
1384 			       (struct request_values *)&tmp_ext) ||
1385 	    want_cookie)
1386 		goto drop_and_free;
1387 
1388 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1389 	return 0;
1390 
1391 drop_and_release:
1392 	dst_release(dst);
1393 drop_and_free:
1394 	reqsk_free(req);
1395 drop:
1396 	return 0;
1397 }
1398 
1399 
1400 /*
1401  * The three way handshake has completed - we got a valid synack -
1402  * now create the new socket.
1403  */
1404 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1405 				  struct request_sock *req,
1406 				  struct dst_entry *dst)
1407 {
1408 	struct inet_request_sock *ireq;
1409 	struct inet_sock *newinet;
1410 	struct tcp_sock *newtp;
1411 	struct sock *newsk;
1412 #ifdef CONFIG_TCP_MD5SIG
1413 	struct tcp_md5sig_key *key;
1414 #endif
1415 
1416 	if (sk_acceptq_is_full(sk))
1417 		goto exit_overflow;
1418 
1419 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1420 		goto exit;
1421 
1422 	newsk = tcp_create_openreq_child(sk, req, skb);
1423 	if (!newsk)
1424 		goto exit;
1425 
1426 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1427 	sk_setup_caps(newsk, dst);
1428 
1429 	newtp		      = tcp_sk(newsk);
1430 	newinet		      = inet_sk(newsk);
1431 	ireq		      = inet_rsk(req);
1432 	newinet->inet_daddr   = ireq->rmt_addr;
1433 	newinet->inet_rcv_saddr = ireq->loc_addr;
1434 	newinet->inet_saddr	      = ireq->loc_addr;
1435 	newinet->opt	      = ireq->opt;
1436 	ireq->opt	      = NULL;
1437 	newinet->mc_index     = inet_iif(skb);
1438 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1439 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1440 	if (newinet->opt)
1441 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1442 	newinet->inet_id = newtp->write_seq ^ jiffies;
1443 
1444 	tcp_mtup_init(newsk);
1445 	tcp_sync_mss(newsk, dst_mtu(dst));
1446 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1447 	if (tcp_sk(sk)->rx_opt.user_mss &&
1448 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1449 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1450 
1451 	tcp_initialize_rcv_mss(newsk);
1452 
1453 #ifdef CONFIG_TCP_MD5SIG
1454 	/* Copy over the MD5 key from the original socket */
1455 	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1456 	if (key != NULL) {
1457 		/*
1458 		 * We're using one, so create a matching key
1459 		 * on the newsk structure. If we fail to get
1460 		 * memory, then we end up not copying the key
1461 		 * across. Shucks.
1462 		 */
1463 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1464 		if (newkey != NULL)
1465 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1466 					  newkey, key->keylen);
1467 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1468 	}
1469 #endif
1470 
1471 	__inet_hash_nolisten(newsk, NULL);
1472 	__inet_inherit_port(sk, newsk);
1473 
1474 	return newsk;
1475 
1476 exit_overflow:
1477 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1478 exit:
1479 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1480 	dst_release(dst);
1481 	return NULL;
1482 }
1483 
1484 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1485 {
1486 	struct tcphdr *th = tcp_hdr(skb);
1487 	const struct iphdr *iph = ip_hdr(skb);
1488 	struct sock *nsk;
1489 	struct request_sock **prev;
1490 	/* Find possible connection requests. */
1491 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1492 						       iph->saddr, iph->daddr);
1493 	if (req)
1494 		return tcp_check_req(sk, skb, req, prev);
1495 
1496 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1497 			th->source, iph->daddr, th->dest, inet_iif(skb));
1498 
1499 	if (nsk) {
1500 		if (nsk->sk_state != TCP_TIME_WAIT) {
1501 			bh_lock_sock(nsk);
1502 			return nsk;
1503 		}
1504 		inet_twsk_put(inet_twsk(nsk));
1505 		return NULL;
1506 	}
1507 
1508 #ifdef CONFIG_SYN_COOKIES
1509 	if (!th->syn)
1510 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1511 #endif
1512 	return sk;
1513 }
1514 
1515 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1516 {
1517 	const struct iphdr *iph = ip_hdr(skb);
1518 
1519 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1520 		if (!tcp_v4_check(skb->len, iph->saddr,
1521 				  iph->daddr, skb->csum)) {
1522 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1523 			return 0;
1524 		}
1525 	}
1526 
1527 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1528 				       skb->len, IPPROTO_TCP, 0);
1529 
1530 	if (skb->len <= 76) {
1531 		return __skb_checksum_complete(skb);
1532 	}
1533 	return 0;
1534 }
1535 
1536 
1537 /* The socket must have it's spinlock held when we get
1538  * here.
1539  *
1540  * We have a potential double-lock case here, so even when
1541  * doing backlog processing we use the BH locking scheme.
1542  * This is because we cannot sleep with the original spinlock
1543  * held.
1544  */
1545 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1546 {
1547 	struct sock *rsk;
1548 #ifdef CONFIG_TCP_MD5SIG
1549 	/*
1550 	 * We really want to reject the packet as early as possible
1551 	 * if:
1552 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1553 	 *  o There is an MD5 option and we're not expecting one
1554 	 */
1555 	if (tcp_v4_inbound_md5_hash(sk, skb))
1556 		goto discard;
1557 #endif
1558 
1559 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1560 		TCP_CHECK_TIMER(sk);
1561 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1562 			rsk = sk;
1563 			goto reset;
1564 		}
1565 		TCP_CHECK_TIMER(sk);
1566 		return 0;
1567 	}
1568 
1569 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1570 		goto csum_err;
1571 
1572 	if (sk->sk_state == TCP_LISTEN) {
1573 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1574 		if (!nsk)
1575 			goto discard;
1576 
1577 		if (nsk != sk) {
1578 			if (tcp_child_process(sk, nsk, skb)) {
1579 				rsk = nsk;
1580 				goto reset;
1581 			}
1582 			return 0;
1583 		}
1584 	}
1585 
1586 	TCP_CHECK_TIMER(sk);
1587 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1588 		rsk = sk;
1589 		goto reset;
1590 	}
1591 	TCP_CHECK_TIMER(sk);
1592 	return 0;
1593 
1594 reset:
1595 	tcp_v4_send_reset(rsk, skb);
1596 discard:
1597 	kfree_skb(skb);
1598 	/* Be careful here. If this function gets more complicated and
1599 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1600 	 * might be destroyed here. This current version compiles correctly,
1601 	 * but you have been warned.
1602 	 */
1603 	return 0;
1604 
1605 csum_err:
1606 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1607 	goto discard;
1608 }
1609 
1610 /*
1611  *	From tcp_input.c
1612  */
1613 
1614 int tcp_v4_rcv(struct sk_buff *skb)
1615 {
1616 	const struct iphdr *iph;
1617 	struct tcphdr *th;
1618 	struct sock *sk;
1619 	int ret;
1620 	struct net *net = dev_net(skb->dev);
1621 
1622 	if (skb->pkt_type != PACKET_HOST)
1623 		goto discard_it;
1624 
1625 	/* Count it even if it's bad */
1626 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1627 
1628 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1629 		goto discard_it;
1630 
1631 	th = tcp_hdr(skb);
1632 
1633 	if (th->doff < sizeof(struct tcphdr) / 4)
1634 		goto bad_packet;
1635 	if (!pskb_may_pull(skb, th->doff * 4))
1636 		goto discard_it;
1637 
1638 	/* An explanation is required here, I think.
1639 	 * Packet length and doff are validated by header prediction,
1640 	 * provided case of th->doff==0 is eliminated.
1641 	 * So, we defer the checks. */
1642 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1643 		goto bad_packet;
1644 
1645 	th = tcp_hdr(skb);
1646 	iph = ip_hdr(skb);
1647 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1648 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1649 				    skb->len - th->doff * 4);
1650 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1651 	TCP_SKB_CB(skb)->when	 = 0;
1652 	TCP_SKB_CB(skb)->flags	 = iph->tos;
1653 	TCP_SKB_CB(skb)->sacked	 = 0;
1654 
1655 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1656 	if (!sk)
1657 		goto no_tcp_socket;
1658 
1659 process:
1660 	if (sk->sk_state == TCP_TIME_WAIT)
1661 		goto do_time_wait;
1662 
1663 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1664 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1665 		goto discard_and_relse;
1666 	}
1667 
1668 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1669 		goto discard_and_relse;
1670 	nf_reset(skb);
1671 
1672 	if (sk_filter(sk, skb))
1673 		goto discard_and_relse;
1674 
1675 	skb->dev = NULL;
1676 
1677 	sock_rps_save_rxhash(sk, skb->rxhash);
1678 
1679 	bh_lock_sock_nested(sk);
1680 	ret = 0;
1681 	if (!sock_owned_by_user(sk)) {
1682 #ifdef CONFIG_NET_DMA
1683 		struct tcp_sock *tp = tcp_sk(sk);
1684 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1685 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1686 		if (tp->ucopy.dma_chan)
1687 			ret = tcp_v4_do_rcv(sk, skb);
1688 		else
1689 #endif
1690 		{
1691 			if (!tcp_prequeue(sk, skb))
1692 				ret = tcp_v4_do_rcv(sk, skb);
1693 		}
1694 	} else if (unlikely(sk_add_backlog(sk, skb))) {
1695 		bh_unlock_sock(sk);
1696 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1697 		goto discard_and_relse;
1698 	}
1699 	bh_unlock_sock(sk);
1700 
1701 	sock_put(sk);
1702 
1703 	return ret;
1704 
1705 no_tcp_socket:
1706 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1707 		goto discard_it;
1708 
1709 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1710 bad_packet:
1711 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1712 	} else {
1713 		tcp_v4_send_reset(NULL, skb);
1714 	}
1715 
1716 discard_it:
1717 	/* Discard frame. */
1718 	kfree_skb(skb);
1719 	return 0;
1720 
1721 discard_and_relse:
1722 	sock_put(sk);
1723 	goto discard_it;
1724 
1725 do_time_wait:
1726 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1727 		inet_twsk_put(inet_twsk(sk));
1728 		goto discard_it;
1729 	}
1730 
1731 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1732 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1733 		inet_twsk_put(inet_twsk(sk));
1734 		goto discard_it;
1735 	}
1736 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1737 	case TCP_TW_SYN: {
1738 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1739 							&tcp_hashinfo,
1740 							iph->daddr, th->dest,
1741 							inet_iif(skb));
1742 		if (sk2) {
1743 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1744 			inet_twsk_put(inet_twsk(sk));
1745 			sk = sk2;
1746 			goto process;
1747 		}
1748 		/* Fall through to ACK */
1749 	}
1750 	case TCP_TW_ACK:
1751 		tcp_v4_timewait_ack(sk, skb);
1752 		break;
1753 	case TCP_TW_RST:
1754 		goto no_tcp_socket;
1755 	case TCP_TW_SUCCESS:;
1756 	}
1757 	goto discard_it;
1758 }
1759 
1760 /* VJ's idea. Save last timestamp seen from this destination
1761  * and hold it at least for normal timewait interval to use for duplicate
1762  * segment detection in subsequent connections, before they enter synchronized
1763  * state.
1764  */
1765 
1766 int tcp_v4_remember_stamp(struct sock *sk)
1767 {
1768 	struct inet_sock *inet = inet_sk(sk);
1769 	struct tcp_sock *tp = tcp_sk(sk);
1770 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1771 	struct inet_peer *peer = NULL;
1772 	int release_it = 0;
1773 
1774 	if (!rt || rt->rt_dst != inet->inet_daddr) {
1775 		peer = inet_getpeer(inet->inet_daddr, 1);
1776 		release_it = 1;
1777 	} else {
1778 		if (!rt->peer)
1779 			rt_bind_peer(rt, 1);
1780 		peer = rt->peer;
1781 	}
1782 
1783 	if (peer) {
1784 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1785 		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1786 		     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1787 			peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1788 			peer->tcp_ts = tp->rx_opt.ts_recent;
1789 		}
1790 		if (release_it)
1791 			inet_putpeer(peer);
1792 		return 1;
1793 	}
1794 
1795 	return 0;
1796 }
1797 
1798 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1799 {
1800 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1801 
1802 	if (peer) {
1803 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1804 
1805 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1806 		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1807 		     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1808 			peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
1809 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1810 		}
1811 		inet_putpeer(peer);
1812 		return 1;
1813 	}
1814 
1815 	return 0;
1816 }
1817 
1818 const struct inet_connection_sock_af_ops ipv4_specific = {
1819 	.queue_xmit	   = ip_queue_xmit,
1820 	.send_check	   = tcp_v4_send_check,
1821 	.rebuild_header	   = inet_sk_rebuild_header,
1822 	.conn_request	   = tcp_v4_conn_request,
1823 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1824 	.remember_stamp	   = tcp_v4_remember_stamp,
1825 	.net_header_len	   = sizeof(struct iphdr),
1826 	.setsockopt	   = ip_setsockopt,
1827 	.getsockopt	   = ip_getsockopt,
1828 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1829 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1830 	.bind_conflict	   = inet_csk_bind_conflict,
1831 #ifdef CONFIG_COMPAT
1832 	.compat_setsockopt = compat_ip_setsockopt,
1833 	.compat_getsockopt = compat_ip_getsockopt,
1834 #endif
1835 };
1836 
1837 #ifdef CONFIG_TCP_MD5SIG
1838 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1839 	.md5_lookup		= tcp_v4_md5_lookup,
1840 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1841 	.md5_add		= tcp_v4_md5_add_func,
1842 	.md5_parse		= tcp_v4_parse_md5_keys,
1843 };
1844 #endif
1845 
1846 /* NOTE: A lot of things set to zero explicitly by call to
1847  *       sk_alloc() so need not be done here.
1848  */
1849 static int tcp_v4_init_sock(struct sock *sk)
1850 {
1851 	struct inet_connection_sock *icsk = inet_csk(sk);
1852 	struct tcp_sock *tp = tcp_sk(sk);
1853 
1854 	skb_queue_head_init(&tp->out_of_order_queue);
1855 	tcp_init_xmit_timers(sk);
1856 	tcp_prequeue_init(tp);
1857 
1858 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1859 	tp->mdev = TCP_TIMEOUT_INIT;
1860 
1861 	/* So many TCP implementations out there (incorrectly) count the
1862 	 * initial SYN frame in their delayed-ACK and congestion control
1863 	 * algorithms that we must have the following bandaid to talk
1864 	 * efficiently to them.  -DaveM
1865 	 */
1866 	tp->snd_cwnd = 2;
1867 
1868 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1869 	 * initialization of these values.
1870 	 */
1871 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1872 	tp->snd_cwnd_clamp = ~0;
1873 	tp->mss_cache = TCP_MSS_DEFAULT;
1874 
1875 	tp->reordering = sysctl_tcp_reordering;
1876 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1877 
1878 	sk->sk_state = TCP_CLOSE;
1879 
1880 	sk->sk_write_space = sk_stream_write_space;
1881 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1882 
1883 	icsk->icsk_af_ops = &ipv4_specific;
1884 	icsk->icsk_sync_mss = tcp_sync_mss;
1885 #ifdef CONFIG_TCP_MD5SIG
1886 	tp->af_specific = &tcp_sock_ipv4_specific;
1887 #endif
1888 
1889 	/* TCP Cookie Transactions */
1890 	if (sysctl_tcp_cookie_size > 0) {
1891 		/* Default, cookies without s_data_payload. */
1892 		tp->cookie_values =
1893 			kzalloc(sizeof(*tp->cookie_values),
1894 				sk->sk_allocation);
1895 		if (tp->cookie_values != NULL)
1896 			kref_init(&tp->cookie_values->kref);
1897 	}
1898 	/* Presumed zeroed, in order of appearance:
1899 	 *	cookie_in_always, cookie_out_never,
1900 	 *	s_data_constant, s_data_in, s_data_out
1901 	 */
1902 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1903 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1904 
1905 	local_bh_disable();
1906 	percpu_counter_inc(&tcp_sockets_allocated);
1907 	local_bh_enable();
1908 
1909 	return 0;
1910 }
1911 
1912 void tcp_v4_destroy_sock(struct sock *sk)
1913 {
1914 	struct tcp_sock *tp = tcp_sk(sk);
1915 
1916 	tcp_clear_xmit_timers(sk);
1917 
1918 	tcp_cleanup_congestion_control(sk);
1919 
1920 	/* Cleanup up the write buffer. */
1921 	tcp_write_queue_purge(sk);
1922 
1923 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1924 	__skb_queue_purge(&tp->out_of_order_queue);
1925 
1926 #ifdef CONFIG_TCP_MD5SIG
1927 	/* Clean up the MD5 key list, if any */
1928 	if (tp->md5sig_info) {
1929 		tcp_v4_clear_md5_list(sk);
1930 		kfree(tp->md5sig_info);
1931 		tp->md5sig_info = NULL;
1932 	}
1933 #endif
1934 
1935 #ifdef CONFIG_NET_DMA
1936 	/* Cleans up our sk_async_wait_queue */
1937 	__skb_queue_purge(&sk->sk_async_wait_queue);
1938 #endif
1939 
1940 	/* Clean prequeue, it must be empty really */
1941 	__skb_queue_purge(&tp->ucopy.prequeue);
1942 
1943 	/* Clean up a referenced TCP bind bucket. */
1944 	if (inet_csk(sk)->icsk_bind_hash)
1945 		inet_put_port(sk);
1946 
1947 	/*
1948 	 * If sendmsg cached page exists, toss it.
1949 	 */
1950 	if (sk->sk_sndmsg_page) {
1951 		__free_page(sk->sk_sndmsg_page);
1952 		sk->sk_sndmsg_page = NULL;
1953 	}
1954 
1955 	/* TCP Cookie Transactions */
1956 	if (tp->cookie_values != NULL) {
1957 		kref_put(&tp->cookie_values->kref,
1958 			 tcp_cookie_values_release);
1959 		tp->cookie_values = NULL;
1960 	}
1961 
1962 	percpu_counter_dec(&tcp_sockets_allocated);
1963 }
1964 
1965 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1966 
1967 #ifdef CONFIG_PROC_FS
1968 /* Proc filesystem TCP sock list dumping. */
1969 
1970 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1971 {
1972 	return hlist_nulls_empty(head) ? NULL :
1973 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1974 }
1975 
1976 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1977 {
1978 	return !is_a_nulls(tw->tw_node.next) ?
1979 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1980 }
1981 
1982 static void *listening_get_next(struct seq_file *seq, void *cur)
1983 {
1984 	struct inet_connection_sock *icsk;
1985 	struct hlist_nulls_node *node;
1986 	struct sock *sk = cur;
1987 	struct inet_listen_hashbucket *ilb;
1988 	struct tcp_iter_state *st = seq->private;
1989 	struct net *net = seq_file_net(seq);
1990 
1991 	if (!sk) {
1992 		st->bucket = 0;
1993 		ilb = &tcp_hashinfo.listening_hash[0];
1994 		spin_lock_bh(&ilb->lock);
1995 		sk = sk_nulls_head(&ilb->head);
1996 		goto get_sk;
1997 	}
1998 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1999 	++st->num;
2000 
2001 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2002 		struct request_sock *req = cur;
2003 
2004 		icsk = inet_csk(st->syn_wait_sk);
2005 		req = req->dl_next;
2006 		while (1) {
2007 			while (req) {
2008 				if (req->rsk_ops->family == st->family) {
2009 					cur = req;
2010 					goto out;
2011 				}
2012 				req = req->dl_next;
2013 			}
2014 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2015 				break;
2016 get_req:
2017 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2018 		}
2019 		sk	  = sk_next(st->syn_wait_sk);
2020 		st->state = TCP_SEQ_STATE_LISTENING;
2021 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2022 	} else {
2023 		icsk = inet_csk(sk);
2024 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2025 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2026 			goto start_req;
2027 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2028 		sk = sk_next(sk);
2029 	}
2030 get_sk:
2031 	sk_nulls_for_each_from(sk, node) {
2032 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
2033 			cur = sk;
2034 			goto out;
2035 		}
2036 		icsk = inet_csk(sk);
2037 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2038 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2039 start_req:
2040 			st->uid		= sock_i_uid(sk);
2041 			st->syn_wait_sk = sk;
2042 			st->state	= TCP_SEQ_STATE_OPENREQ;
2043 			st->sbucket	= 0;
2044 			goto get_req;
2045 		}
2046 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2047 	}
2048 	spin_unlock_bh(&ilb->lock);
2049 	if (++st->bucket < INET_LHTABLE_SIZE) {
2050 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2051 		spin_lock_bh(&ilb->lock);
2052 		sk = sk_nulls_head(&ilb->head);
2053 		goto get_sk;
2054 	}
2055 	cur = NULL;
2056 out:
2057 	return cur;
2058 }
2059 
2060 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2061 {
2062 	void *rc = listening_get_next(seq, NULL);
2063 
2064 	while (rc && *pos) {
2065 		rc = listening_get_next(seq, rc);
2066 		--*pos;
2067 	}
2068 	return rc;
2069 }
2070 
2071 static inline int empty_bucket(struct tcp_iter_state *st)
2072 {
2073 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2074 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2075 }
2076 
2077 static void *established_get_first(struct seq_file *seq)
2078 {
2079 	struct tcp_iter_state *st = seq->private;
2080 	struct net *net = seq_file_net(seq);
2081 	void *rc = NULL;
2082 
2083 	for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2084 		struct sock *sk;
2085 		struct hlist_nulls_node *node;
2086 		struct inet_timewait_sock *tw;
2087 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2088 
2089 		/* Lockless fast path for the common case of empty buckets */
2090 		if (empty_bucket(st))
2091 			continue;
2092 
2093 		spin_lock_bh(lock);
2094 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2095 			if (sk->sk_family != st->family ||
2096 			    !net_eq(sock_net(sk), net)) {
2097 				continue;
2098 			}
2099 			rc = sk;
2100 			goto out;
2101 		}
2102 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2103 		inet_twsk_for_each(tw, node,
2104 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2105 			if (tw->tw_family != st->family ||
2106 			    !net_eq(twsk_net(tw), net)) {
2107 				continue;
2108 			}
2109 			rc = tw;
2110 			goto out;
2111 		}
2112 		spin_unlock_bh(lock);
2113 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2114 	}
2115 out:
2116 	return rc;
2117 }
2118 
2119 static void *established_get_next(struct seq_file *seq, void *cur)
2120 {
2121 	struct sock *sk = cur;
2122 	struct inet_timewait_sock *tw;
2123 	struct hlist_nulls_node *node;
2124 	struct tcp_iter_state *st = seq->private;
2125 	struct net *net = seq_file_net(seq);
2126 
2127 	++st->num;
2128 
2129 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2130 		tw = cur;
2131 		tw = tw_next(tw);
2132 get_tw:
2133 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2134 			tw = tw_next(tw);
2135 		}
2136 		if (tw) {
2137 			cur = tw;
2138 			goto out;
2139 		}
2140 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2141 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2142 
2143 		/* Look for next non empty bucket */
2144 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2145 				empty_bucket(st))
2146 			;
2147 		if (st->bucket > tcp_hashinfo.ehash_mask)
2148 			return NULL;
2149 
2150 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2151 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2152 	} else
2153 		sk = sk_nulls_next(sk);
2154 
2155 	sk_nulls_for_each_from(sk, node) {
2156 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2157 			goto found;
2158 	}
2159 
2160 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2161 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2162 	goto get_tw;
2163 found:
2164 	cur = sk;
2165 out:
2166 	return cur;
2167 }
2168 
2169 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2170 {
2171 	void *rc = established_get_first(seq);
2172 
2173 	while (rc && pos) {
2174 		rc = established_get_next(seq, rc);
2175 		--pos;
2176 	}
2177 	return rc;
2178 }
2179 
2180 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2181 {
2182 	void *rc;
2183 	struct tcp_iter_state *st = seq->private;
2184 
2185 	st->state = TCP_SEQ_STATE_LISTENING;
2186 	rc	  = listening_get_idx(seq, &pos);
2187 
2188 	if (!rc) {
2189 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2190 		rc	  = established_get_idx(seq, pos);
2191 	}
2192 
2193 	return rc;
2194 }
2195 
2196 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2197 {
2198 	struct tcp_iter_state *st = seq->private;
2199 	st->state = TCP_SEQ_STATE_LISTENING;
2200 	st->num = 0;
2201 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2202 }
2203 
2204 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2205 {
2206 	void *rc = NULL;
2207 	struct tcp_iter_state *st;
2208 
2209 	if (v == SEQ_START_TOKEN) {
2210 		rc = tcp_get_idx(seq, 0);
2211 		goto out;
2212 	}
2213 	st = seq->private;
2214 
2215 	switch (st->state) {
2216 	case TCP_SEQ_STATE_OPENREQ:
2217 	case TCP_SEQ_STATE_LISTENING:
2218 		rc = listening_get_next(seq, v);
2219 		if (!rc) {
2220 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2221 			rc	  = established_get_first(seq);
2222 		}
2223 		break;
2224 	case TCP_SEQ_STATE_ESTABLISHED:
2225 	case TCP_SEQ_STATE_TIME_WAIT:
2226 		rc = established_get_next(seq, v);
2227 		break;
2228 	}
2229 out:
2230 	++*pos;
2231 	return rc;
2232 }
2233 
2234 static void tcp_seq_stop(struct seq_file *seq, void *v)
2235 {
2236 	struct tcp_iter_state *st = seq->private;
2237 
2238 	switch (st->state) {
2239 	case TCP_SEQ_STATE_OPENREQ:
2240 		if (v) {
2241 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2242 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2243 		}
2244 	case TCP_SEQ_STATE_LISTENING:
2245 		if (v != SEQ_START_TOKEN)
2246 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2247 		break;
2248 	case TCP_SEQ_STATE_TIME_WAIT:
2249 	case TCP_SEQ_STATE_ESTABLISHED:
2250 		if (v)
2251 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2252 		break;
2253 	}
2254 }
2255 
2256 static int tcp_seq_open(struct inode *inode, struct file *file)
2257 {
2258 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2259 	struct tcp_iter_state *s;
2260 	int err;
2261 
2262 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2263 			  sizeof(struct tcp_iter_state));
2264 	if (err < 0)
2265 		return err;
2266 
2267 	s = ((struct seq_file *)file->private_data)->private;
2268 	s->family		= afinfo->family;
2269 	return 0;
2270 }
2271 
2272 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2273 {
2274 	int rc = 0;
2275 	struct proc_dir_entry *p;
2276 
2277 	afinfo->seq_fops.open		= tcp_seq_open;
2278 	afinfo->seq_fops.read		= seq_read;
2279 	afinfo->seq_fops.llseek		= seq_lseek;
2280 	afinfo->seq_fops.release	= seq_release_net;
2281 
2282 	afinfo->seq_ops.start		= tcp_seq_start;
2283 	afinfo->seq_ops.next		= tcp_seq_next;
2284 	afinfo->seq_ops.stop		= tcp_seq_stop;
2285 
2286 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2287 			     &afinfo->seq_fops, afinfo);
2288 	if (!p)
2289 		rc = -ENOMEM;
2290 	return rc;
2291 }
2292 
2293 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2294 {
2295 	proc_net_remove(net, afinfo->name);
2296 }
2297 
2298 static void get_openreq4(struct sock *sk, struct request_sock *req,
2299 			 struct seq_file *f, int i, int uid, int *len)
2300 {
2301 	const struct inet_request_sock *ireq = inet_rsk(req);
2302 	int ttd = req->expires - jiffies;
2303 
2304 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2305 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2306 		i,
2307 		ireq->loc_addr,
2308 		ntohs(inet_sk(sk)->inet_sport),
2309 		ireq->rmt_addr,
2310 		ntohs(ireq->rmt_port),
2311 		TCP_SYN_RECV,
2312 		0, 0, /* could print option size, but that is af dependent. */
2313 		1,    /* timers active (only the expire timer) */
2314 		jiffies_to_clock_t(ttd),
2315 		req->retrans,
2316 		uid,
2317 		0,  /* non standard timer */
2318 		0, /* open_requests have no inode */
2319 		atomic_read(&sk->sk_refcnt),
2320 		req,
2321 		len);
2322 }
2323 
2324 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2325 {
2326 	int timer_active;
2327 	unsigned long timer_expires;
2328 	struct tcp_sock *tp = tcp_sk(sk);
2329 	const struct inet_connection_sock *icsk = inet_csk(sk);
2330 	struct inet_sock *inet = inet_sk(sk);
2331 	__be32 dest = inet->inet_daddr;
2332 	__be32 src = inet->inet_rcv_saddr;
2333 	__u16 destp = ntohs(inet->inet_dport);
2334 	__u16 srcp = ntohs(inet->inet_sport);
2335 	int rx_queue;
2336 
2337 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2338 		timer_active	= 1;
2339 		timer_expires	= icsk->icsk_timeout;
2340 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2341 		timer_active	= 4;
2342 		timer_expires	= icsk->icsk_timeout;
2343 	} else if (timer_pending(&sk->sk_timer)) {
2344 		timer_active	= 2;
2345 		timer_expires	= sk->sk_timer.expires;
2346 	} else {
2347 		timer_active	= 0;
2348 		timer_expires = jiffies;
2349 	}
2350 
2351 	if (sk->sk_state == TCP_LISTEN)
2352 		rx_queue = sk->sk_ack_backlog;
2353 	else
2354 		/*
2355 		 * because we dont lock socket, we might find a transient negative value
2356 		 */
2357 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2358 
2359 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2360 			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2361 		i, src, srcp, dest, destp, sk->sk_state,
2362 		tp->write_seq - tp->snd_una,
2363 		rx_queue,
2364 		timer_active,
2365 		jiffies_to_clock_t(timer_expires - jiffies),
2366 		icsk->icsk_retransmits,
2367 		sock_i_uid(sk),
2368 		icsk->icsk_probes_out,
2369 		sock_i_ino(sk),
2370 		atomic_read(&sk->sk_refcnt), sk,
2371 		jiffies_to_clock_t(icsk->icsk_rto),
2372 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2373 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2374 		tp->snd_cwnd,
2375 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2376 		len);
2377 }
2378 
2379 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2380 			       struct seq_file *f, int i, int *len)
2381 {
2382 	__be32 dest, src;
2383 	__u16 destp, srcp;
2384 	int ttd = tw->tw_ttd - jiffies;
2385 
2386 	if (ttd < 0)
2387 		ttd = 0;
2388 
2389 	dest  = tw->tw_daddr;
2390 	src   = tw->tw_rcv_saddr;
2391 	destp = ntohs(tw->tw_dport);
2392 	srcp  = ntohs(tw->tw_sport);
2393 
2394 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2395 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2396 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2397 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2398 		atomic_read(&tw->tw_refcnt), tw, len);
2399 }
2400 
2401 #define TMPSZ 150
2402 
2403 static int tcp4_seq_show(struct seq_file *seq, void *v)
2404 {
2405 	struct tcp_iter_state *st;
2406 	int len;
2407 
2408 	if (v == SEQ_START_TOKEN) {
2409 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2410 			   "  sl  local_address rem_address   st tx_queue "
2411 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2412 			   "inode");
2413 		goto out;
2414 	}
2415 	st = seq->private;
2416 
2417 	switch (st->state) {
2418 	case TCP_SEQ_STATE_LISTENING:
2419 	case TCP_SEQ_STATE_ESTABLISHED:
2420 		get_tcp4_sock(v, seq, st->num, &len);
2421 		break;
2422 	case TCP_SEQ_STATE_OPENREQ:
2423 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2424 		break;
2425 	case TCP_SEQ_STATE_TIME_WAIT:
2426 		get_timewait4_sock(v, seq, st->num, &len);
2427 		break;
2428 	}
2429 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2430 out:
2431 	return 0;
2432 }
2433 
2434 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2435 	.name		= "tcp",
2436 	.family		= AF_INET,
2437 	.seq_fops	= {
2438 		.owner		= THIS_MODULE,
2439 	},
2440 	.seq_ops	= {
2441 		.show		= tcp4_seq_show,
2442 	},
2443 };
2444 
2445 static int __net_init tcp4_proc_init_net(struct net *net)
2446 {
2447 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2448 }
2449 
2450 static void __net_exit tcp4_proc_exit_net(struct net *net)
2451 {
2452 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2453 }
2454 
2455 static struct pernet_operations tcp4_net_ops = {
2456 	.init = tcp4_proc_init_net,
2457 	.exit = tcp4_proc_exit_net,
2458 };
2459 
2460 int __init tcp4_proc_init(void)
2461 {
2462 	return register_pernet_subsys(&tcp4_net_ops);
2463 }
2464 
2465 void tcp4_proc_exit(void)
2466 {
2467 	unregister_pernet_subsys(&tcp4_net_ops);
2468 }
2469 #endif /* CONFIG_PROC_FS */
2470 
2471 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2472 {
2473 	struct iphdr *iph = skb_gro_network_header(skb);
2474 
2475 	switch (skb->ip_summed) {
2476 	case CHECKSUM_COMPLETE:
2477 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2478 				  skb->csum)) {
2479 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2480 			break;
2481 		}
2482 
2483 		/* fall through */
2484 	case CHECKSUM_NONE:
2485 		NAPI_GRO_CB(skb)->flush = 1;
2486 		return NULL;
2487 	}
2488 
2489 	return tcp_gro_receive(head, skb);
2490 }
2491 EXPORT_SYMBOL(tcp4_gro_receive);
2492 
2493 int tcp4_gro_complete(struct sk_buff *skb)
2494 {
2495 	struct iphdr *iph = ip_hdr(skb);
2496 	struct tcphdr *th = tcp_hdr(skb);
2497 
2498 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2499 				  iph->saddr, iph->daddr, 0);
2500 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2501 
2502 	return tcp_gro_complete(skb);
2503 }
2504 EXPORT_SYMBOL(tcp4_gro_complete);
2505 
2506 struct proto tcp_prot = {
2507 	.name			= "TCP",
2508 	.owner			= THIS_MODULE,
2509 	.close			= tcp_close,
2510 	.connect		= tcp_v4_connect,
2511 	.disconnect		= tcp_disconnect,
2512 	.accept			= inet_csk_accept,
2513 	.ioctl			= tcp_ioctl,
2514 	.init			= tcp_v4_init_sock,
2515 	.destroy		= tcp_v4_destroy_sock,
2516 	.shutdown		= tcp_shutdown,
2517 	.setsockopt		= tcp_setsockopt,
2518 	.getsockopt		= tcp_getsockopt,
2519 	.recvmsg		= tcp_recvmsg,
2520 	.backlog_rcv		= tcp_v4_do_rcv,
2521 	.hash			= inet_hash,
2522 	.unhash			= inet_unhash,
2523 	.get_port		= inet_csk_get_port,
2524 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2525 	.sockets_allocated	= &tcp_sockets_allocated,
2526 	.orphan_count		= &tcp_orphan_count,
2527 	.memory_allocated	= &tcp_memory_allocated,
2528 	.memory_pressure	= &tcp_memory_pressure,
2529 	.sysctl_mem		= sysctl_tcp_mem,
2530 	.sysctl_wmem		= sysctl_tcp_wmem,
2531 	.sysctl_rmem		= sysctl_tcp_rmem,
2532 	.max_header		= MAX_TCP_HEADER,
2533 	.obj_size		= sizeof(struct tcp_sock),
2534 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2535 	.twsk_prot		= &tcp_timewait_sock_ops,
2536 	.rsk_prot		= &tcp_request_sock_ops,
2537 	.h.hashinfo		= &tcp_hashinfo,
2538 #ifdef CONFIG_COMPAT
2539 	.compat_setsockopt	= compat_tcp_setsockopt,
2540 	.compat_getsockopt	= compat_tcp_getsockopt,
2541 #endif
2542 };
2543 
2544 
2545 static int __net_init tcp_sk_init(struct net *net)
2546 {
2547 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2548 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2549 }
2550 
2551 static void __net_exit tcp_sk_exit(struct net *net)
2552 {
2553 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2554 }
2555 
2556 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2557 {
2558 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2559 }
2560 
2561 static struct pernet_operations __net_initdata tcp_sk_ops = {
2562        .init	   = tcp_sk_init,
2563        .exit	   = tcp_sk_exit,
2564        .exit_batch = tcp_sk_exit_batch,
2565 };
2566 
2567 void __init tcp_v4_init(void)
2568 {
2569 	inet_hashinfo_init(&tcp_hashinfo);
2570 	if (register_pernet_subsys(&tcp_sk_ops))
2571 		panic("Failed to create the TCP control socket.\n");
2572 }
2573 
2574 EXPORT_SYMBOL(ipv4_specific);
2575 EXPORT_SYMBOL(tcp_hashinfo);
2576 EXPORT_SYMBOL(tcp_prot);
2577 EXPORT_SYMBOL(tcp_v4_conn_request);
2578 EXPORT_SYMBOL(tcp_v4_connect);
2579 EXPORT_SYMBOL(tcp_v4_do_rcv);
2580 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2581 EXPORT_SYMBOL(tcp_v4_send_check);
2582 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2583 
2584 #ifdef CONFIG_PROC_FS
2585 EXPORT_SYMBOL(tcp_proc_register);
2586 EXPORT_SYMBOL(tcp_proc_unregister);
2587 #endif
2588 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2589 
2590