xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision 5d4a2e29)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 
54 #include <linux/bottom_half.h>
55 #include <linux/types.h>
56 #include <linux/fcntl.h>
57 #include <linux/module.h>
58 #include <linux/random.h>
59 #include <linux/cache.h>
60 #include <linux/jhash.h>
61 #include <linux/init.h>
62 #include <linux/times.h>
63 #include <linux/slab.h>
64 
65 #include <net/net_namespace.h>
66 #include <net/icmp.h>
67 #include <net/inet_hashtables.h>
68 #include <net/tcp.h>
69 #include <net/transp_v6.h>
70 #include <net/ipv6.h>
71 #include <net/inet_common.h>
72 #include <net/timewait_sock.h>
73 #include <net/xfrm.h>
74 #include <net/netdma.h>
75 
76 #include <linux/inet.h>
77 #include <linux/ipv6.h>
78 #include <linux/stddef.h>
79 #include <linux/proc_fs.h>
80 #include <linux/seq_file.h>
81 
82 #include <linux/crypto.h>
83 #include <linux/scatterlist.h>
84 
85 int sysctl_tcp_tw_reuse __read_mostly;
86 int sysctl_tcp_low_latency __read_mostly;
87 
88 
89 #ifdef CONFIG_TCP_MD5SIG
90 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
91 						   __be32 addr);
92 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
93 			       __be32 daddr, __be32 saddr, struct tcphdr *th);
94 #else
95 static inline
96 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
97 {
98 	return NULL;
99 }
100 #endif
101 
102 struct inet_hashinfo tcp_hashinfo;
103 
104 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
105 {
106 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
107 					  ip_hdr(skb)->saddr,
108 					  tcp_hdr(skb)->dest,
109 					  tcp_hdr(skb)->source);
110 }
111 
112 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
113 {
114 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
115 	struct tcp_sock *tp = tcp_sk(sk);
116 
117 	/* With PAWS, it is safe from the viewpoint
118 	   of data integrity. Even without PAWS it is safe provided sequence
119 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
120 
121 	   Actually, the idea is close to VJ's one, only timestamp cache is
122 	   held not per host, but per port pair and TW bucket is used as state
123 	   holder.
124 
125 	   If TW bucket has been already destroyed we fall back to VJ's scheme
126 	   and use initial timestamp retrieved from peer table.
127 	 */
128 	if (tcptw->tw_ts_recent_stamp &&
129 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
130 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
131 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
132 		if (tp->write_seq == 0)
133 			tp->write_seq = 1;
134 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
135 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
136 		sock_hold(sktw);
137 		return 1;
138 	}
139 
140 	return 0;
141 }
142 
143 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
144 
145 /* This will initiate an outgoing connection. */
146 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
147 {
148 	struct inet_sock *inet = inet_sk(sk);
149 	struct tcp_sock *tp = tcp_sk(sk);
150 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
151 	struct rtable *rt;
152 	__be32 daddr, nexthop;
153 	int tmp;
154 	int err;
155 
156 	if (addr_len < sizeof(struct sockaddr_in))
157 		return -EINVAL;
158 
159 	if (usin->sin_family != AF_INET)
160 		return -EAFNOSUPPORT;
161 
162 	nexthop = daddr = usin->sin_addr.s_addr;
163 	if (inet->opt && inet->opt->srr) {
164 		if (!daddr)
165 			return -EINVAL;
166 		nexthop = inet->opt->faddr;
167 	}
168 
169 	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
170 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
171 			       IPPROTO_TCP,
172 			       inet->inet_sport, usin->sin_port, sk, 1);
173 	if (tmp < 0) {
174 		if (tmp == -ENETUNREACH)
175 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
176 		return tmp;
177 	}
178 
179 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
180 		ip_rt_put(rt);
181 		return -ENETUNREACH;
182 	}
183 
184 	if (!inet->opt || !inet->opt->srr)
185 		daddr = rt->rt_dst;
186 
187 	if (!inet->inet_saddr)
188 		inet->inet_saddr = rt->rt_src;
189 	inet->inet_rcv_saddr = inet->inet_saddr;
190 
191 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
192 		/* Reset inherited state */
193 		tp->rx_opt.ts_recent	   = 0;
194 		tp->rx_opt.ts_recent_stamp = 0;
195 		tp->write_seq		   = 0;
196 	}
197 
198 	if (tcp_death_row.sysctl_tw_recycle &&
199 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
200 		struct inet_peer *peer = rt_get_peer(rt);
201 		/*
202 		 * VJ's idea. We save last timestamp seen from
203 		 * the destination in peer table, when entering state
204 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
205 		 * when trying new connection.
206 		 */
207 		if (peer != NULL &&
208 		    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
209 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
210 			tp->rx_opt.ts_recent = peer->tcp_ts;
211 		}
212 	}
213 
214 	inet->inet_dport = usin->sin_port;
215 	inet->inet_daddr = daddr;
216 
217 	inet_csk(sk)->icsk_ext_hdr_len = 0;
218 	if (inet->opt)
219 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
220 
221 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
222 
223 	/* Socket identity is still unknown (sport may be zero).
224 	 * However we set state to SYN-SENT and not releasing socket
225 	 * lock select source port, enter ourselves into the hash tables and
226 	 * complete initialization after this.
227 	 */
228 	tcp_set_state(sk, TCP_SYN_SENT);
229 	err = inet_hash_connect(&tcp_death_row, sk);
230 	if (err)
231 		goto failure;
232 
233 	err = ip_route_newports(&rt, IPPROTO_TCP,
234 				inet->inet_sport, inet->inet_dport, sk);
235 	if (err)
236 		goto failure;
237 
238 	/* OK, now commit destination to socket.  */
239 	sk->sk_gso_type = SKB_GSO_TCPV4;
240 	sk_setup_caps(sk, &rt->u.dst);
241 
242 	if (!tp->write_seq)
243 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
244 							   inet->inet_daddr,
245 							   inet->inet_sport,
246 							   usin->sin_port);
247 
248 	inet->inet_id = tp->write_seq ^ jiffies;
249 
250 	err = tcp_connect(sk);
251 	rt = NULL;
252 	if (err)
253 		goto failure;
254 
255 	return 0;
256 
257 failure:
258 	/*
259 	 * This unhashes the socket and releases the local port,
260 	 * if necessary.
261 	 */
262 	tcp_set_state(sk, TCP_CLOSE);
263 	ip_rt_put(rt);
264 	sk->sk_route_caps = 0;
265 	inet->inet_dport = 0;
266 	return err;
267 }
268 
269 /*
270  * This routine does path mtu discovery as defined in RFC1191.
271  */
272 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
273 {
274 	struct dst_entry *dst;
275 	struct inet_sock *inet = inet_sk(sk);
276 
277 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
278 	 * send out by Linux are always <576bytes so they should go through
279 	 * unfragmented).
280 	 */
281 	if (sk->sk_state == TCP_LISTEN)
282 		return;
283 
284 	/* We don't check in the destentry if pmtu discovery is forbidden
285 	 * on this route. We just assume that no packet_to_big packets
286 	 * are send back when pmtu discovery is not active.
287 	 * There is a small race when the user changes this flag in the
288 	 * route, but I think that's acceptable.
289 	 */
290 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
291 		return;
292 
293 	dst->ops->update_pmtu(dst, mtu);
294 
295 	/* Something is about to be wrong... Remember soft error
296 	 * for the case, if this connection will not able to recover.
297 	 */
298 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
299 		sk->sk_err_soft = EMSGSIZE;
300 
301 	mtu = dst_mtu(dst);
302 
303 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
304 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
305 		tcp_sync_mss(sk, mtu);
306 
307 		/* Resend the TCP packet because it's
308 		 * clear that the old packet has been
309 		 * dropped. This is the new "fast" path mtu
310 		 * discovery.
311 		 */
312 		tcp_simple_retransmit(sk);
313 	} /* else let the usual retransmit timer handle it */
314 }
315 
316 /*
317  * This routine is called by the ICMP module when it gets some
318  * sort of error condition.  If err < 0 then the socket should
319  * be closed and the error returned to the user.  If err > 0
320  * it's just the icmp type << 8 | icmp code.  After adjustment
321  * header points to the first 8 bytes of the tcp header.  We need
322  * to find the appropriate port.
323  *
324  * The locking strategy used here is very "optimistic". When
325  * someone else accesses the socket the ICMP is just dropped
326  * and for some paths there is no check at all.
327  * A more general error queue to queue errors for later handling
328  * is probably better.
329  *
330  */
331 
332 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
333 {
334 	struct iphdr *iph = (struct iphdr *)icmp_skb->data;
335 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
336 	struct inet_connection_sock *icsk;
337 	struct tcp_sock *tp;
338 	struct inet_sock *inet;
339 	const int type = icmp_hdr(icmp_skb)->type;
340 	const int code = icmp_hdr(icmp_skb)->code;
341 	struct sock *sk;
342 	struct sk_buff *skb;
343 	__u32 seq;
344 	__u32 remaining;
345 	int err;
346 	struct net *net = dev_net(icmp_skb->dev);
347 
348 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
349 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
350 		return;
351 	}
352 
353 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
354 			iph->saddr, th->source, inet_iif(icmp_skb));
355 	if (!sk) {
356 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
357 		return;
358 	}
359 	if (sk->sk_state == TCP_TIME_WAIT) {
360 		inet_twsk_put(inet_twsk(sk));
361 		return;
362 	}
363 
364 	bh_lock_sock(sk);
365 	/* If too many ICMPs get dropped on busy
366 	 * servers this needs to be solved differently.
367 	 */
368 	if (sock_owned_by_user(sk))
369 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 
371 	if (sk->sk_state == TCP_CLOSE)
372 		goto out;
373 
374 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 		goto out;
377 	}
378 
379 	icsk = inet_csk(sk);
380 	tp = tcp_sk(sk);
381 	seq = ntohl(th->seq);
382 	if (sk->sk_state != TCP_LISTEN &&
383 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
384 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
385 		goto out;
386 	}
387 
388 	switch (type) {
389 	case ICMP_SOURCE_QUENCH:
390 		/* Just silently ignore these. */
391 		goto out;
392 	case ICMP_PARAMETERPROB:
393 		err = EPROTO;
394 		break;
395 	case ICMP_DEST_UNREACH:
396 		if (code > NR_ICMP_UNREACH)
397 			goto out;
398 
399 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
400 			if (!sock_owned_by_user(sk))
401 				do_pmtu_discovery(sk, iph, info);
402 			goto out;
403 		}
404 
405 		err = icmp_err_convert[code].errno;
406 		/* check if icmp_skb allows revert of backoff
407 		 * (see draft-zimmermann-tcp-lcd) */
408 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
409 			break;
410 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
411 		    !icsk->icsk_backoff)
412 			break;
413 
414 		icsk->icsk_backoff--;
415 		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
416 					 icsk->icsk_backoff;
417 		tcp_bound_rto(sk);
418 
419 		skb = tcp_write_queue_head(sk);
420 		BUG_ON(!skb);
421 
422 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
423 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
424 
425 		if (remaining) {
426 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
427 						  remaining, TCP_RTO_MAX);
428 		} else if (sock_owned_by_user(sk)) {
429 			/* RTO revert clocked out retransmission,
430 			 * but socket is locked. Will defer. */
431 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
432 						  HZ/20, TCP_RTO_MAX);
433 		} else {
434 			/* RTO revert clocked out retransmission.
435 			 * Will retransmit now */
436 			tcp_retransmit_timer(sk);
437 		}
438 
439 		break;
440 	case ICMP_TIME_EXCEEDED:
441 		err = EHOSTUNREACH;
442 		break;
443 	default:
444 		goto out;
445 	}
446 
447 	switch (sk->sk_state) {
448 		struct request_sock *req, **prev;
449 	case TCP_LISTEN:
450 		if (sock_owned_by_user(sk))
451 			goto out;
452 
453 		req = inet_csk_search_req(sk, &prev, th->dest,
454 					  iph->daddr, iph->saddr);
455 		if (!req)
456 			goto out;
457 
458 		/* ICMPs are not backlogged, hence we cannot get
459 		   an established socket here.
460 		 */
461 		WARN_ON(req->sk);
462 
463 		if (seq != tcp_rsk(req)->snt_isn) {
464 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
465 			goto out;
466 		}
467 
468 		/*
469 		 * Still in SYN_RECV, just remove it silently.
470 		 * There is no good way to pass the error to the newly
471 		 * created socket, and POSIX does not want network
472 		 * errors returned from accept().
473 		 */
474 		inet_csk_reqsk_queue_drop(sk, req, prev);
475 		goto out;
476 
477 	case TCP_SYN_SENT:
478 	case TCP_SYN_RECV:  /* Cannot happen.
479 			       It can f.e. if SYNs crossed.
480 			     */
481 		if (!sock_owned_by_user(sk)) {
482 			sk->sk_err = err;
483 
484 			sk->sk_error_report(sk);
485 
486 			tcp_done(sk);
487 		} else {
488 			sk->sk_err_soft = err;
489 		}
490 		goto out;
491 	}
492 
493 	/* If we've already connected we will keep trying
494 	 * until we time out, or the user gives up.
495 	 *
496 	 * rfc1122 4.2.3.9 allows to consider as hard errors
497 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
498 	 * but it is obsoleted by pmtu discovery).
499 	 *
500 	 * Note, that in modern internet, where routing is unreliable
501 	 * and in each dark corner broken firewalls sit, sending random
502 	 * errors ordered by their masters even this two messages finally lose
503 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
504 	 *
505 	 * Now we are in compliance with RFCs.
506 	 *							--ANK (980905)
507 	 */
508 
509 	inet = inet_sk(sk);
510 	if (!sock_owned_by_user(sk) && inet->recverr) {
511 		sk->sk_err = err;
512 		sk->sk_error_report(sk);
513 	} else	{ /* Only an error on timeout */
514 		sk->sk_err_soft = err;
515 	}
516 
517 out:
518 	bh_unlock_sock(sk);
519 	sock_put(sk);
520 }
521 
522 static void __tcp_v4_send_check(struct sk_buff *skb,
523 				__be32 saddr, __be32 daddr)
524 {
525 	struct tcphdr *th = tcp_hdr(skb);
526 
527 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
528 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
529 		skb->csum_start = skb_transport_header(skb) - skb->head;
530 		skb->csum_offset = offsetof(struct tcphdr, check);
531 	} else {
532 		th->check = tcp_v4_check(skb->len, saddr, daddr,
533 					 csum_partial(th,
534 						      th->doff << 2,
535 						      skb->csum));
536 	}
537 }
538 
539 /* This routine computes an IPv4 TCP checksum. */
540 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
541 {
542 	struct inet_sock *inet = inet_sk(sk);
543 
544 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
545 }
546 
547 int tcp_v4_gso_send_check(struct sk_buff *skb)
548 {
549 	const struct iphdr *iph;
550 	struct tcphdr *th;
551 
552 	if (!pskb_may_pull(skb, sizeof(*th)))
553 		return -EINVAL;
554 
555 	iph = ip_hdr(skb);
556 	th = tcp_hdr(skb);
557 
558 	th->check = 0;
559 	skb->ip_summed = CHECKSUM_PARTIAL;
560 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
561 	return 0;
562 }
563 
564 /*
565  *	This routine will send an RST to the other tcp.
566  *
567  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
568  *		      for reset.
569  *	Answer: if a packet caused RST, it is not for a socket
570  *		existing in our system, if it is matched to a socket,
571  *		it is just duplicate segment or bug in other side's TCP.
572  *		So that we build reply only basing on parameters
573  *		arrived with segment.
574  *	Exception: precedence violation. We do not implement it in any case.
575  */
576 
577 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
578 {
579 	struct tcphdr *th = tcp_hdr(skb);
580 	struct {
581 		struct tcphdr th;
582 #ifdef CONFIG_TCP_MD5SIG
583 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
584 #endif
585 	} rep;
586 	struct ip_reply_arg arg;
587 #ifdef CONFIG_TCP_MD5SIG
588 	struct tcp_md5sig_key *key;
589 #endif
590 	struct net *net;
591 
592 	/* Never send a reset in response to a reset. */
593 	if (th->rst)
594 		return;
595 
596 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
597 		return;
598 
599 	/* Swap the send and the receive. */
600 	memset(&rep, 0, sizeof(rep));
601 	rep.th.dest   = th->source;
602 	rep.th.source = th->dest;
603 	rep.th.doff   = sizeof(struct tcphdr) / 4;
604 	rep.th.rst    = 1;
605 
606 	if (th->ack) {
607 		rep.th.seq = th->ack_seq;
608 	} else {
609 		rep.th.ack = 1;
610 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
611 				       skb->len - (th->doff << 2));
612 	}
613 
614 	memset(&arg, 0, sizeof(arg));
615 	arg.iov[0].iov_base = (unsigned char *)&rep;
616 	arg.iov[0].iov_len  = sizeof(rep.th);
617 
618 #ifdef CONFIG_TCP_MD5SIG
619 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
620 	if (key) {
621 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
622 				   (TCPOPT_NOP << 16) |
623 				   (TCPOPT_MD5SIG << 8) |
624 				   TCPOLEN_MD5SIG);
625 		/* Update length and the length the header thinks exists */
626 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
627 		rep.th.doff = arg.iov[0].iov_len / 4;
628 
629 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
630 				     key, ip_hdr(skb)->saddr,
631 				     ip_hdr(skb)->daddr, &rep.th);
632 	}
633 #endif
634 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
635 				      ip_hdr(skb)->saddr, /* XXX */
636 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
637 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
638 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
639 
640 	net = dev_net(skb_dst(skb)->dev);
641 	ip_send_reply(net->ipv4.tcp_sock, skb,
642 		      &arg, arg.iov[0].iov_len);
643 
644 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
645 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
646 }
647 
648 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
649    outside socket context is ugly, certainly. What can I do?
650  */
651 
652 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
653 			    u32 win, u32 ts, int oif,
654 			    struct tcp_md5sig_key *key,
655 			    int reply_flags)
656 {
657 	struct tcphdr *th = tcp_hdr(skb);
658 	struct {
659 		struct tcphdr th;
660 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
661 #ifdef CONFIG_TCP_MD5SIG
662 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
663 #endif
664 			];
665 	} rep;
666 	struct ip_reply_arg arg;
667 	struct net *net = dev_net(skb_dst(skb)->dev);
668 
669 	memset(&rep.th, 0, sizeof(struct tcphdr));
670 	memset(&arg, 0, sizeof(arg));
671 
672 	arg.iov[0].iov_base = (unsigned char *)&rep;
673 	arg.iov[0].iov_len  = sizeof(rep.th);
674 	if (ts) {
675 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
676 				   (TCPOPT_TIMESTAMP << 8) |
677 				   TCPOLEN_TIMESTAMP);
678 		rep.opt[1] = htonl(tcp_time_stamp);
679 		rep.opt[2] = htonl(ts);
680 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
681 	}
682 
683 	/* Swap the send and the receive. */
684 	rep.th.dest    = th->source;
685 	rep.th.source  = th->dest;
686 	rep.th.doff    = arg.iov[0].iov_len / 4;
687 	rep.th.seq     = htonl(seq);
688 	rep.th.ack_seq = htonl(ack);
689 	rep.th.ack     = 1;
690 	rep.th.window  = htons(win);
691 
692 #ifdef CONFIG_TCP_MD5SIG
693 	if (key) {
694 		int offset = (ts) ? 3 : 0;
695 
696 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
697 					  (TCPOPT_NOP << 16) |
698 					  (TCPOPT_MD5SIG << 8) |
699 					  TCPOLEN_MD5SIG);
700 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
701 		rep.th.doff = arg.iov[0].iov_len/4;
702 
703 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
704 				    key, ip_hdr(skb)->saddr,
705 				    ip_hdr(skb)->daddr, &rep.th);
706 	}
707 #endif
708 	arg.flags = reply_flags;
709 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
710 				      ip_hdr(skb)->saddr, /* XXX */
711 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
712 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
713 	if (oif)
714 		arg.bound_dev_if = oif;
715 
716 	ip_send_reply(net->ipv4.tcp_sock, skb,
717 		      &arg, arg.iov[0].iov_len);
718 
719 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
720 }
721 
722 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
723 {
724 	struct inet_timewait_sock *tw = inet_twsk(sk);
725 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
726 
727 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
728 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
729 			tcptw->tw_ts_recent,
730 			tw->tw_bound_dev_if,
731 			tcp_twsk_md5_key(tcptw),
732 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
733 			);
734 
735 	inet_twsk_put(tw);
736 }
737 
738 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
739 				  struct request_sock *req)
740 {
741 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
742 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
743 			req->ts_recent,
744 			0,
745 			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
746 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
747 }
748 
749 /*
750  *	Send a SYN-ACK after having received a SYN.
751  *	This still operates on a request_sock only, not on a big
752  *	socket.
753  */
754 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
755 			      struct request_sock *req,
756 			      struct request_values *rvp)
757 {
758 	const struct inet_request_sock *ireq = inet_rsk(req);
759 	int err = -1;
760 	struct sk_buff * skb;
761 
762 	/* First, grab a route. */
763 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
764 		return -1;
765 
766 	skb = tcp_make_synack(sk, dst, req, rvp);
767 
768 	if (skb) {
769 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
770 
771 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
772 					    ireq->rmt_addr,
773 					    ireq->opt);
774 		err = net_xmit_eval(err);
775 	}
776 
777 	dst_release(dst);
778 	return err;
779 }
780 
781 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
782 			      struct request_values *rvp)
783 {
784 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
785 	return tcp_v4_send_synack(sk, NULL, req, rvp);
786 }
787 
788 /*
789  *	IPv4 request_sock destructor.
790  */
791 static void tcp_v4_reqsk_destructor(struct request_sock *req)
792 {
793 	kfree(inet_rsk(req)->opt);
794 }
795 
796 #ifdef CONFIG_SYN_COOKIES
797 static void syn_flood_warning(struct sk_buff *skb)
798 {
799 	static unsigned long warntime;
800 
801 	if (time_after(jiffies, (warntime + HZ * 60))) {
802 		warntime = jiffies;
803 		printk(KERN_INFO
804 		       "possible SYN flooding on port %d. Sending cookies.\n",
805 		       ntohs(tcp_hdr(skb)->dest));
806 	}
807 }
808 #endif
809 
810 /*
811  * Save and compile IPv4 options into the request_sock if needed.
812  */
813 static struct ip_options *tcp_v4_save_options(struct sock *sk,
814 					      struct sk_buff *skb)
815 {
816 	struct ip_options *opt = &(IPCB(skb)->opt);
817 	struct ip_options *dopt = NULL;
818 
819 	if (opt && opt->optlen) {
820 		int opt_size = optlength(opt);
821 		dopt = kmalloc(opt_size, GFP_ATOMIC);
822 		if (dopt) {
823 			if (ip_options_echo(dopt, skb)) {
824 				kfree(dopt);
825 				dopt = NULL;
826 			}
827 		}
828 	}
829 	return dopt;
830 }
831 
832 #ifdef CONFIG_TCP_MD5SIG
833 /*
834  * RFC2385 MD5 checksumming requires a mapping of
835  * IP address->MD5 Key.
836  * We need to maintain these in the sk structure.
837  */
838 
839 /* Find the Key structure for an address.  */
840 static struct tcp_md5sig_key *
841 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
842 {
843 	struct tcp_sock *tp = tcp_sk(sk);
844 	int i;
845 
846 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
847 		return NULL;
848 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
849 		if (tp->md5sig_info->keys4[i].addr == addr)
850 			return &tp->md5sig_info->keys4[i].base;
851 	}
852 	return NULL;
853 }
854 
855 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
856 					 struct sock *addr_sk)
857 {
858 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
859 }
860 
861 EXPORT_SYMBOL(tcp_v4_md5_lookup);
862 
863 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
864 						      struct request_sock *req)
865 {
866 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
867 }
868 
869 /* This can be called on a newly created socket, from other files */
870 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
871 		      u8 *newkey, u8 newkeylen)
872 {
873 	/* Add Key to the list */
874 	struct tcp_md5sig_key *key;
875 	struct tcp_sock *tp = tcp_sk(sk);
876 	struct tcp4_md5sig_key *keys;
877 
878 	key = tcp_v4_md5_do_lookup(sk, addr);
879 	if (key) {
880 		/* Pre-existing entry - just update that one. */
881 		kfree(key->key);
882 		key->key = newkey;
883 		key->keylen = newkeylen;
884 	} else {
885 		struct tcp_md5sig_info *md5sig;
886 
887 		if (!tp->md5sig_info) {
888 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
889 						  GFP_ATOMIC);
890 			if (!tp->md5sig_info) {
891 				kfree(newkey);
892 				return -ENOMEM;
893 			}
894 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
895 		}
896 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
897 			kfree(newkey);
898 			return -ENOMEM;
899 		}
900 		md5sig = tp->md5sig_info;
901 
902 		if (md5sig->alloced4 == md5sig->entries4) {
903 			keys = kmalloc((sizeof(*keys) *
904 					(md5sig->entries4 + 1)), GFP_ATOMIC);
905 			if (!keys) {
906 				kfree(newkey);
907 				tcp_free_md5sig_pool();
908 				return -ENOMEM;
909 			}
910 
911 			if (md5sig->entries4)
912 				memcpy(keys, md5sig->keys4,
913 				       sizeof(*keys) * md5sig->entries4);
914 
915 			/* Free old key list, and reference new one */
916 			kfree(md5sig->keys4);
917 			md5sig->keys4 = keys;
918 			md5sig->alloced4++;
919 		}
920 		md5sig->entries4++;
921 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
922 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
923 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
924 	}
925 	return 0;
926 }
927 
928 EXPORT_SYMBOL(tcp_v4_md5_do_add);
929 
930 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
931 			       u8 *newkey, u8 newkeylen)
932 {
933 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
934 				 newkey, newkeylen);
935 }
936 
937 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
938 {
939 	struct tcp_sock *tp = tcp_sk(sk);
940 	int i;
941 
942 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
943 		if (tp->md5sig_info->keys4[i].addr == addr) {
944 			/* Free the key */
945 			kfree(tp->md5sig_info->keys4[i].base.key);
946 			tp->md5sig_info->entries4--;
947 
948 			if (tp->md5sig_info->entries4 == 0) {
949 				kfree(tp->md5sig_info->keys4);
950 				tp->md5sig_info->keys4 = NULL;
951 				tp->md5sig_info->alloced4 = 0;
952 			} else if (tp->md5sig_info->entries4 != i) {
953 				/* Need to do some manipulation */
954 				memmove(&tp->md5sig_info->keys4[i],
955 					&tp->md5sig_info->keys4[i+1],
956 					(tp->md5sig_info->entries4 - i) *
957 					 sizeof(struct tcp4_md5sig_key));
958 			}
959 			tcp_free_md5sig_pool();
960 			return 0;
961 		}
962 	}
963 	return -ENOENT;
964 }
965 
966 EXPORT_SYMBOL(tcp_v4_md5_do_del);
967 
968 static void tcp_v4_clear_md5_list(struct sock *sk)
969 {
970 	struct tcp_sock *tp = tcp_sk(sk);
971 
972 	/* Free each key, then the set of key keys,
973 	 * the crypto element, and then decrement our
974 	 * hold on the last resort crypto.
975 	 */
976 	if (tp->md5sig_info->entries4) {
977 		int i;
978 		for (i = 0; i < tp->md5sig_info->entries4; i++)
979 			kfree(tp->md5sig_info->keys4[i].base.key);
980 		tp->md5sig_info->entries4 = 0;
981 		tcp_free_md5sig_pool();
982 	}
983 	if (tp->md5sig_info->keys4) {
984 		kfree(tp->md5sig_info->keys4);
985 		tp->md5sig_info->keys4 = NULL;
986 		tp->md5sig_info->alloced4  = 0;
987 	}
988 }
989 
990 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
991 				 int optlen)
992 {
993 	struct tcp_md5sig cmd;
994 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
995 	u8 *newkey;
996 
997 	if (optlen < sizeof(cmd))
998 		return -EINVAL;
999 
1000 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1001 		return -EFAULT;
1002 
1003 	if (sin->sin_family != AF_INET)
1004 		return -EINVAL;
1005 
1006 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1007 		if (!tcp_sk(sk)->md5sig_info)
1008 			return -ENOENT;
1009 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1010 	}
1011 
1012 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1013 		return -EINVAL;
1014 
1015 	if (!tcp_sk(sk)->md5sig_info) {
1016 		struct tcp_sock *tp = tcp_sk(sk);
1017 		struct tcp_md5sig_info *p;
1018 
1019 		p = kzalloc(sizeof(*p), sk->sk_allocation);
1020 		if (!p)
1021 			return -EINVAL;
1022 
1023 		tp->md5sig_info = p;
1024 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1025 	}
1026 
1027 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1028 	if (!newkey)
1029 		return -ENOMEM;
1030 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1031 				 newkey, cmd.tcpm_keylen);
1032 }
1033 
1034 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1035 					__be32 daddr, __be32 saddr, int nbytes)
1036 {
1037 	struct tcp4_pseudohdr *bp;
1038 	struct scatterlist sg;
1039 
1040 	bp = &hp->md5_blk.ip4;
1041 
1042 	/*
1043 	 * 1. the TCP pseudo-header (in the order: source IP address,
1044 	 * destination IP address, zero-padded protocol number, and
1045 	 * segment length)
1046 	 */
1047 	bp->saddr = saddr;
1048 	bp->daddr = daddr;
1049 	bp->pad = 0;
1050 	bp->protocol = IPPROTO_TCP;
1051 	bp->len = cpu_to_be16(nbytes);
1052 
1053 	sg_init_one(&sg, bp, sizeof(*bp));
1054 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1055 }
1056 
1057 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1058 			       __be32 daddr, __be32 saddr, struct tcphdr *th)
1059 {
1060 	struct tcp_md5sig_pool *hp;
1061 	struct hash_desc *desc;
1062 
1063 	hp = tcp_get_md5sig_pool();
1064 	if (!hp)
1065 		goto clear_hash_noput;
1066 	desc = &hp->md5_desc;
1067 
1068 	if (crypto_hash_init(desc))
1069 		goto clear_hash;
1070 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1071 		goto clear_hash;
1072 	if (tcp_md5_hash_header(hp, th))
1073 		goto clear_hash;
1074 	if (tcp_md5_hash_key(hp, key))
1075 		goto clear_hash;
1076 	if (crypto_hash_final(desc, md5_hash))
1077 		goto clear_hash;
1078 
1079 	tcp_put_md5sig_pool();
1080 	return 0;
1081 
1082 clear_hash:
1083 	tcp_put_md5sig_pool();
1084 clear_hash_noput:
1085 	memset(md5_hash, 0, 16);
1086 	return 1;
1087 }
1088 
1089 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1090 			struct sock *sk, struct request_sock *req,
1091 			struct sk_buff *skb)
1092 {
1093 	struct tcp_md5sig_pool *hp;
1094 	struct hash_desc *desc;
1095 	struct tcphdr *th = tcp_hdr(skb);
1096 	__be32 saddr, daddr;
1097 
1098 	if (sk) {
1099 		saddr = inet_sk(sk)->inet_saddr;
1100 		daddr = inet_sk(sk)->inet_daddr;
1101 	} else if (req) {
1102 		saddr = inet_rsk(req)->loc_addr;
1103 		daddr = inet_rsk(req)->rmt_addr;
1104 	} else {
1105 		const struct iphdr *iph = ip_hdr(skb);
1106 		saddr = iph->saddr;
1107 		daddr = iph->daddr;
1108 	}
1109 
1110 	hp = tcp_get_md5sig_pool();
1111 	if (!hp)
1112 		goto clear_hash_noput;
1113 	desc = &hp->md5_desc;
1114 
1115 	if (crypto_hash_init(desc))
1116 		goto clear_hash;
1117 
1118 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1119 		goto clear_hash;
1120 	if (tcp_md5_hash_header(hp, th))
1121 		goto clear_hash;
1122 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1123 		goto clear_hash;
1124 	if (tcp_md5_hash_key(hp, key))
1125 		goto clear_hash;
1126 	if (crypto_hash_final(desc, md5_hash))
1127 		goto clear_hash;
1128 
1129 	tcp_put_md5sig_pool();
1130 	return 0;
1131 
1132 clear_hash:
1133 	tcp_put_md5sig_pool();
1134 clear_hash_noput:
1135 	memset(md5_hash, 0, 16);
1136 	return 1;
1137 }
1138 
1139 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1140 
1141 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1142 {
1143 	/*
1144 	 * This gets called for each TCP segment that arrives
1145 	 * so we want to be efficient.
1146 	 * We have 3 drop cases:
1147 	 * o No MD5 hash and one expected.
1148 	 * o MD5 hash and we're not expecting one.
1149 	 * o MD5 hash and its wrong.
1150 	 */
1151 	__u8 *hash_location = NULL;
1152 	struct tcp_md5sig_key *hash_expected;
1153 	const struct iphdr *iph = ip_hdr(skb);
1154 	struct tcphdr *th = tcp_hdr(skb);
1155 	int genhash;
1156 	unsigned char newhash[16];
1157 
1158 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1159 	hash_location = tcp_parse_md5sig_option(th);
1160 
1161 	/* We've parsed the options - do we have a hash? */
1162 	if (!hash_expected && !hash_location)
1163 		return 0;
1164 
1165 	if (hash_expected && !hash_location) {
1166 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1167 		return 1;
1168 	}
1169 
1170 	if (!hash_expected && hash_location) {
1171 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1172 		return 1;
1173 	}
1174 
1175 	/* Okay, so this is hash_expected and hash_location -
1176 	 * so we need to calculate the checksum.
1177 	 */
1178 	genhash = tcp_v4_md5_hash_skb(newhash,
1179 				      hash_expected,
1180 				      NULL, NULL, skb);
1181 
1182 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1183 		if (net_ratelimit()) {
1184 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1185 			       &iph->saddr, ntohs(th->source),
1186 			       &iph->daddr, ntohs(th->dest),
1187 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1188 		}
1189 		return 1;
1190 	}
1191 	return 0;
1192 }
1193 
1194 #endif
1195 
1196 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1197 	.family		=	PF_INET,
1198 	.obj_size	=	sizeof(struct tcp_request_sock),
1199 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1200 	.send_ack	=	tcp_v4_reqsk_send_ack,
1201 	.destructor	=	tcp_v4_reqsk_destructor,
1202 	.send_reset	=	tcp_v4_send_reset,
1203 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1204 };
1205 
1206 #ifdef CONFIG_TCP_MD5SIG
1207 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1208 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1209 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1210 };
1211 #endif
1212 
1213 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1214 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1215 	.twsk_unique	= tcp_twsk_unique,
1216 	.twsk_destructor= tcp_twsk_destructor,
1217 };
1218 
1219 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1220 {
1221 	struct tcp_extend_values tmp_ext;
1222 	struct tcp_options_received tmp_opt;
1223 	u8 *hash_location;
1224 	struct request_sock *req;
1225 	struct inet_request_sock *ireq;
1226 	struct tcp_sock *tp = tcp_sk(sk);
1227 	struct dst_entry *dst = NULL;
1228 	__be32 saddr = ip_hdr(skb)->saddr;
1229 	__be32 daddr = ip_hdr(skb)->daddr;
1230 	__u32 isn = TCP_SKB_CB(skb)->when;
1231 #ifdef CONFIG_SYN_COOKIES
1232 	int want_cookie = 0;
1233 #else
1234 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1235 #endif
1236 
1237 	/* Never answer to SYNs send to broadcast or multicast */
1238 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1239 		goto drop;
1240 
1241 	/* TW buckets are converted to open requests without
1242 	 * limitations, they conserve resources and peer is
1243 	 * evidently real one.
1244 	 */
1245 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1246 #ifdef CONFIG_SYN_COOKIES
1247 		if (sysctl_tcp_syncookies) {
1248 			want_cookie = 1;
1249 		} else
1250 #endif
1251 		goto drop;
1252 	}
1253 
1254 	/* Accept backlog is full. If we have already queued enough
1255 	 * of warm entries in syn queue, drop request. It is better than
1256 	 * clogging syn queue with openreqs with exponentially increasing
1257 	 * timeout.
1258 	 */
1259 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1260 		goto drop;
1261 
1262 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1263 	if (!req)
1264 		goto drop;
1265 
1266 #ifdef CONFIG_TCP_MD5SIG
1267 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1268 #endif
1269 
1270 	tcp_clear_options(&tmp_opt);
1271 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1272 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1273 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1274 
1275 	if (tmp_opt.cookie_plus > 0 &&
1276 	    tmp_opt.saw_tstamp &&
1277 	    !tp->rx_opt.cookie_out_never &&
1278 	    (sysctl_tcp_cookie_size > 0 ||
1279 	     (tp->cookie_values != NULL &&
1280 	      tp->cookie_values->cookie_desired > 0))) {
1281 		u8 *c;
1282 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1283 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1284 
1285 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1286 			goto drop_and_release;
1287 
1288 		/* Secret recipe starts with IP addresses */
1289 		*mess++ ^= (__force u32)daddr;
1290 		*mess++ ^= (__force u32)saddr;
1291 
1292 		/* plus variable length Initiator Cookie */
1293 		c = (u8 *)mess;
1294 		while (l-- > 0)
1295 			*c++ ^= *hash_location++;
1296 
1297 #ifdef CONFIG_SYN_COOKIES
1298 		want_cookie = 0;	/* not our kind of cookie */
1299 #endif
1300 		tmp_ext.cookie_out_never = 0; /* false */
1301 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1302 	} else if (!tp->rx_opt.cookie_in_always) {
1303 		/* redundant indications, but ensure initialization. */
1304 		tmp_ext.cookie_out_never = 1; /* true */
1305 		tmp_ext.cookie_plus = 0;
1306 	} else {
1307 		goto drop_and_release;
1308 	}
1309 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1310 
1311 	if (want_cookie && !tmp_opt.saw_tstamp)
1312 		tcp_clear_options(&tmp_opt);
1313 
1314 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1315 	tcp_openreq_init(req, &tmp_opt, skb);
1316 
1317 	ireq = inet_rsk(req);
1318 	ireq->loc_addr = daddr;
1319 	ireq->rmt_addr = saddr;
1320 	ireq->no_srccheck = inet_sk(sk)->transparent;
1321 	ireq->opt = tcp_v4_save_options(sk, skb);
1322 
1323 	if (security_inet_conn_request(sk, skb, req))
1324 		goto drop_and_free;
1325 
1326 	if (!want_cookie)
1327 		TCP_ECN_create_request(req, tcp_hdr(skb));
1328 
1329 	if (want_cookie) {
1330 #ifdef CONFIG_SYN_COOKIES
1331 		syn_flood_warning(skb);
1332 		req->cookie_ts = tmp_opt.tstamp_ok;
1333 #endif
1334 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1335 	} else if (!isn) {
1336 		struct inet_peer *peer = NULL;
1337 
1338 		/* VJ's idea. We save last timestamp seen
1339 		 * from the destination in peer table, when entering
1340 		 * state TIME-WAIT, and check against it before
1341 		 * accepting new connection request.
1342 		 *
1343 		 * If "isn" is not zero, this request hit alive
1344 		 * timewait bucket, so that all the necessary checks
1345 		 * are made in the function processing timewait state.
1346 		 */
1347 		if (tmp_opt.saw_tstamp &&
1348 		    tcp_death_row.sysctl_tw_recycle &&
1349 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1350 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1351 		    peer->v4daddr == saddr) {
1352 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1353 			    (s32)(peer->tcp_ts - req->ts_recent) >
1354 							TCP_PAWS_WINDOW) {
1355 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1356 				goto drop_and_release;
1357 			}
1358 		}
1359 		/* Kill the following clause, if you dislike this way. */
1360 		else if (!sysctl_tcp_syncookies &&
1361 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1362 			  (sysctl_max_syn_backlog >> 2)) &&
1363 			 (!peer || !peer->tcp_ts_stamp) &&
1364 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1365 			/* Without syncookies last quarter of
1366 			 * backlog is filled with destinations,
1367 			 * proven to be alive.
1368 			 * It means that we continue to communicate
1369 			 * to destinations, already remembered
1370 			 * to the moment of synflood.
1371 			 */
1372 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1373 				       &saddr, ntohs(tcp_hdr(skb)->source));
1374 			goto drop_and_release;
1375 		}
1376 
1377 		isn = tcp_v4_init_sequence(skb);
1378 	}
1379 	tcp_rsk(req)->snt_isn = isn;
1380 
1381 	if (tcp_v4_send_synack(sk, dst, req,
1382 			       (struct request_values *)&tmp_ext) ||
1383 	    want_cookie)
1384 		goto drop_and_free;
1385 
1386 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1387 	return 0;
1388 
1389 drop_and_release:
1390 	dst_release(dst);
1391 drop_and_free:
1392 	reqsk_free(req);
1393 drop:
1394 	return 0;
1395 }
1396 
1397 
1398 /*
1399  * The three way handshake has completed - we got a valid synack -
1400  * now create the new socket.
1401  */
1402 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1403 				  struct request_sock *req,
1404 				  struct dst_entry *dst)
1405 {
1406 	struct inet_request_sock *ireq;
1407 	struct inet_sock *newinet;
1408 	struct tcp_sock *newtp;
1409 	struct sock *newsk;
1410 #ifdef CONFIG_TCP_MD5SIG
1411 	struct tcp_md5sig_key *key;
1412 #endif
1413 
1414 	if (sk_acceptq_is_full(sk))
1415 		goto exit_overflow;
1416 
1417 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1418 		goto exit;
1419 
1420 	newsk = tcp_create_openreq_child(sk, req, skb);
1421 	if (!newsk)
1422 		goto exit;
1423 
1424 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1425 	sk_setup_caps(newsk, dst);
1426 
1427 	newtp		      = tcp_sk(newsk);
1428 	newinet		      = inet_sk(newsk);
1429 	ireq		      = inet_rsk(req);
1430 	newinet->inet_daddr   = ireq->rmt_addr;
1431 	newinet->inet_rcv_saddr = ireq->loc_addr;
1432 	newinet->inet_saddr	      = ireq->loc_addr;
1433 	newinet->opt	      = ireq->opt;
1434 	ireq->opt	      = NULL;
1435 	newinet->mc_index     = inet_iif(skb);
1436 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1437 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1438 	if (newinet->opt)
1439 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1440 	newinet->inet_id = newtp->write_seq ^ jiffies;
1441 
1442 	tcp_mtup_init(newsk);
1443 	tcp_sync_mss(newsk, dst_mtu(dst));
1444 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1445 	if (tcp_sk(sk)->rx_opt.user_mss &&
1446 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1447 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1448 
1449 	tcp_initialize_rcv_mss(newsk);
1450 
1451 #ifdef CONFIG_TCP_MD5SIG
1452 	/* Copy over the MD5 key from the original socket */
1453 	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1454 	if (key != NULL) {
1455 		/*
1456 		 * We're using one, so create a matching key
1457 		 * on the newsk structure. If we fail to get
1458 		 * memory, then we end up not copying the key
1459 		 * across. Shucks.
1460 		 */
1461 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1462 		if (newkey != NULL)
1463 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1464 					  newkey, key->keylen);
1465 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1466 	}
1467 #endif
1468 
1469 	__inet_hash_nolisten(newsk, NULL);
1470 	__inet_inherit_port(sk, newsk);
1471 
1472 	return newsk;
1473 
1474 exit_overflow:
1475 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1476 exit:
1477 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1478 	dst_release(dst);
1479 	return NULL;
1480 }
1481 
1482 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1483 {
1484 	struct tcphdr *th = tcp_hdr(skb);
1485 	const struct iphdr *iph = ip_hdr(skb);
1486 	struct sock *nsk;
1487 	struct request_sock **prev;
1488 	/* Find possible connection requests. */
1489 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1490 						       iph->saddr, iph->daddr);
1491 	if (req)
1492 		return tcp_check_req(sk, skb, req, prev);
1493 
1494 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1495 			th->source, iph->daddr, th->dest, inet_iif(skb));
1496 
1497 	if (nsk) {
1498 		if (nsk->sk_state != TCP_TIME_WAIT) {
1499 			bh_lock_sock(nsk);
1500 			return nsk;
1501 		}
1502 		inet_twsk_put(inet_twsk(nsk));
1503 		return NULL;
1504 	}
1505 
1506 #ifdef CONFIG_SYN_COOKIES
1507 	if (!th->rst && !th->syn && th->ack)
1508 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1509 #endif
1510 	return sk;
1511 }
1512 
1513 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1514 {
1515 	const struct iphdr *iph = ip_hdr(skb);
1516 
1517 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1518 		if (!tcp_v4_check(skb->len, iph->saddr,
1519 				  iph->daddr, skb->csum)) {
1520 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1521 			return 0;
1522 		}
1523 	}
1524 
1525 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1526 				       skb->len, IPPROTO_TCP, 0);
1527 
1528 	if (skb->len <= 76) {
1529 		return __skb_checksum_complete(skb);
1530 	}
1531 	return 0;
1532 }
1533 
1534 
1535 /* The socket must have it's spinlock held when we get
1536  * here.
1537  *
1538  * We have a potential double-lock case here, so even when
1539  * doing backlog processing we use the BH locking scheme.
1540  * This is because we cannot sleep with the original spinlock
1541  * held.
1542  */
1543 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1544 {
1545 	struct sock *rsk;
1546 #ifdef CONFIG_TCP_MD5SIG
1547 	/*
1548 	 * We really want to reject the packet as early as possible
1549 	 * if:
1550 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1551 	 *  o There is an MD5 option and we're not expecting one
1552 	 */
1553 	if (tcp_v4_inbound_md5_hash(sk, skb))
1554 		goto discard;
1555 #endif
1556 
1557 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1558 		sock_rps_save_rxhash(sk, skb->rxhash);
1559 		TCP_CHECK_TIMER(sk);
1560 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1561 			rsk = sk;
1562 			goto reset;
1563 		}
1564 		TCP_CHECK_TIMER(sk);
1565 		return 0;
1566 	}
1567 
1568 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1569 		goto csum_err;
1570 
1571 	if (sk->sk_state == TCP_LISTEN) {
1572 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1573 		if (!nsk)
1574 			goto discard;
1575 
1576 		if (nsk != sk) {
1577 			if (tcp_child_process(sk, nsk, skb)) {
1578 				rsk = nsk;
1579 				goto reset;
1580 			}
1581 			return 0;
1582 		}
1583 	} else
1584 		sock_rps_save_rxhash(sk, skb->rxhash);
1585 
1586 
1587 	TCP_CHECK_TIMER(sk);
1588 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1589 		rsk = sk;
1590 		goto reset;
1591 	}
1592 	TCP_CHECK_TIMER(sk);
1593 	return 0;
1594 
1595 reset:
1596 	tcp_v4_send_reset(rsk, skb);
1597 discard:
1598 	kfree_skb(skb);
1599 	/* Be careful here. If this function gets more complicated and
1600 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1601 	 * might be destroyed here. This current version compiles correctly,
1602 	 * but you have been warned.
1603 	 */
1604 	return 0;
1605 
1606 csum_err:
1607 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1608 	goto discard;
1609 }
1610 
1611 /*
1612  *	From tcp_input.c
1613  */
1614 
1615 int tcp_v4_rcv(struct sk_buff *skb)
1616 {
1617 	const struct iphdr *iph;
1618 	struct tcphdr *th;
1619 	struct sock *sk;
1620 	int ret;
1621 	struct net *net = dev_net(skb->dev);
1622 
1623 	if (skb->pkt_type != PACKET_HOST)
1624 		goto discard_it;
1625 
1626 	/* Count it even if it's bad */
1627 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1628 
1629 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1630 		goto discard_it;
1631 
1632 	th = tcp_hdr(skb);
1633 
1634 	if (th->doff < sizeof(struct tcphdr) / 4)
1635 		goto bad_packet;
1636 	if (!pskb_may_pull(skb, th->doff * 4))
1637 		goto discard_it;
1638 
1639 	/* An explanation is required here, I think.
1640 	 * Packet length and doff are validated by header prediction,
1641 	 * provided case of th->doff==0 is eliminated.
1642 	 * So, we defer the checks. */
1643 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1644 		goto bad_packet;
1645 
1646 	th = tcp_hdr(skb);
1647 	iph = ip_hdr(skb);
1648 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1649 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1650 				    skb->len - th->doff * 4);
1651 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1652 	TCP_SKB_CB(skb)->when	 = 0;
1653 	TCP_SKB_CB(skb)->flags	 = iph->tos;
1654 	TCP_SKB_CB(skb)->sacked	 = 0;
1655 
1656 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1657 	if (!sk)
1658 		goto no_tcp_socket;
1659 
1660 process:
1661 	if (sk->sk_state == TCP_TIME_WAIT)
1662 		goto do_time_wait;
1663 
1664 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1665 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1666 		goto discard_and_relse;
1667 	}
1668 
1669 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1670 		goto discard_and_relse;
1671 	nf_reset(skb);
1672 
1673 	if (sk_filter(sk, skb))
1674 		goto discard_and_relse;
1675 
1676 	skb->dev = NULL;
1677 
1678 	bh_lock_sock_nested(sk);
1679 	ret = 0;
1680 	if (!sock_owned_by_user(sk)) {
1681 #ifdef CONFIG_NET_DMA
1682 		struct tcp_sock *tp = tcp_sk(sk);
1683 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1684 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1685 		if (tp->ucopy.dma_chan)
1686 			ret = tcp_v4_do_rcv(sk, skb);
1687 		else
1688 #endif
1689 		{
1690 			if (!tcp_prequeue(sk, skb))
1691 				ret = tcp_v4_do_rcv(sk, skb);
1692 		}
1693 	} else if (unlikely(sk_add_backlog(sk, skb))) {
1694 		bh_unlock_sock(sk);
1695 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1696 		goto discard_and_relse;
1697 	}
1698 	bh_unlock_sock(sk);
1699 
1700 	sock_put(sk);
1701 
1702 	return ret;
1703 
1704 no_tcp_socket:
1705 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1706 		goto discard_it;
1707 
1708 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1709 bad_packet:
1710 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1711 	} else {
1712 		tcp_v4_send_reset(NULL, skb);
1713 	}
1714 
1715 discard_it:
1716 	/* Discard frame. */
1717 	kfree_skb(skb);
1718 	return 0;
1719 
1720 discard_and_relse:
1721 	sock_put(sk);
1722 	goto discard_it;
1723 
1724 do_time_wait:
1725 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1726 		inet_twsk_put(inet_twsk(sk));
1727 		goto discard_it;
1728 	}
1729 
1730 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1731 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1732 		inet_twsk_put(inet_twsk(sk));
1733 		goto discard_it;
1734 	}
1735 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1736 	case TCP_TW_SYN: {
1737 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1738 							&tcp_hashinfo,
1739 							iph->daddr, th->dest,
1740 							inet_iif(skb));
1741 		if (sk2) {
1742 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1743 			inet_twsk_put(inet_twsk(sk));
1744 			sk = sk2;
1745 			goto process;
1746 		}
1747 		/* Fall through to ACK */
1748 	}
1749 	case TCP_TW_ACK:
1750 		tcp_v4_timewait_ack(sk, skb);
1751 		break;
1752 	case TCP_TW_RST:
1753 		goto no_tcp_socket;
1754 	case TCP_TW_SUCCESS:;
1755 	}
1756 	goto discard_it;
1757 }
1758 
1759 /* VJ's idea. Save last timestamp seen from this destination
1760  * and hold it at least for normal timewait interval to use for duplicate
1761  * segment detection in subsequent connections, before they enter synchronized
1762  * state.
1763  */
1764 
1765 int tcp_v4_remember_stamp(struct sock *sk)
1766 {
1767 	struct inet_sock *inet = inet_sk(sk);
1768 	struct tcp_sock *tp = tcp_sk(sk);
1769 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1770 	struct inet_peer *peer = NULL;
1771 	int release_it = 0;
1772 
1773 	if (!rt || rt->rt_dst != inet->inet_daddr) {
1774 		peer = inet_getpeer(inet->inet_daddr, 1);
1775 		release_it = 1;
1776 	} else {
1777 		if (!rt->peer)
1778 			rt_bind_peer(rt, 1);
1779 		peer = rt->peer;
1780 	}
1781 
1782 	if (peer) {
1783 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1784 		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1785 		     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1786 			peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1787 			peer->tcp_ts = tp->rx_opt.ts_recent;
1788 		}
1789 		if (release_it)
1790 			inet_putpeer(peer);
1791 		return 1;
1792 	}
1793 
1794 	return 0;
1795 }
1796 
1797 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1798 {
1799 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1800 
1801 	if (peer) {
1802 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1803 
1804 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1805 		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1806 		     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1807 			peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
1808 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1809 		}
1810 		inet_putpeer(peer);
1811 		return 1;
1812 	}
1813 
1814 	return 0;
1815 }
1816 
1817 const struct inet_connection_sock_af_ops ipv4_specific = {
1818 	.queue_xmit	   = ip_queue_xmit,
1819 	.send_check	   = tcp_v4_send_check,
1820 	.rebuild_header	   = inet_sk_rebuild_header,
1821 	.conn_request	   = tcp_v4_conn_request,
1822 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1823 	.remember_stamp	   = tcp_v4_remember_stamp,
1824 	.net_header_len	   = sizeof(struct iphdr),
1825 	.setsockopt	   = ip_setsockopt,
1826 	.getsockopt	   = ip_getsockopt,
1827 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1828 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1829 	.bind_conflict	   = inet_csk_bind_conflict,
1830 #ifdef CONFIG_COMPAT
1831 	.compat_setsockopt = compat_ip_setsockopt,
1832 	.compat_getsockopt = compat_ip_getsockopt,
1833 #endif
1834 };
1835 
1836 #ifdef CONFIG_TCP_MD5SIG
1837 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1838 	.md5_lookup		= tcp_v4_md5_lookup,
1839 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1840 	.md5_add		= tcp_v4_md5_add_func,
1841 	.md5_parse		= tcp_v4_parse_md5_keys,
1842 };
1843 #endif
1844 
1845 /* NOTE: A lot of things set to zero explicitly by call to
1846  *       sk_alloc() so need not be done here.
1847  */
1848 static int tcp_v4_init_sock(struct sock *sk)
1849 {
1850 	struct inet_connection_sock *icsk = inet_csk(sk);
1851 	struct tcp_sock *tp = tcp_sk(sk);
1852 
1853 	skb_queue_head_init(&tp->out_of_order_queue);
1854 	tcp_init_xmit_timers(sk);
1855 	tcp_prequeue_init(tp);
1856 
1857 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1858 	tp->mdev = TCP_TIMEOUT_INIT;
1859 
1860 	/* So many TCP implementations out there (incorrectly) count the
1861 	 * initial SYN frame in their delayed-ACK and congestion control
1862 	 * algorithms that we must have the following bandaid to talk
1863 	 * efficiently to them.  -DaveM
1864 	 */
1865 	tp->snd_cwnd = 2;
1866 
1867 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1868 	 * initialization of these values.
1869 	 */
1870 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1871 	tp->snd_cwnd_clamp = ~0;
1872 	tp->mss_cache = TCP_MSS_DEFAULT;
1873 
1874 	tp->reordering = sysctl_tcp_reordering;
1875 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1876 
1877 	sk->sk_state = TCP_CLOSE;
1878 
1879 	sk->sk_write_space = sk_stream_write_space;
1880 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1881 
1882 	icsk->icsk_af_ops = &ipv4_specific;
1883 	icsk->icsk_sync_mss = tcp_sync_mss;
1884 #ifdef CONFIG_TCP_MD5SIG
1885 	tp->af_specific = &tcp_sock_ipv4_specific;
1886 #endif
1887 
1888 	/* TCP Cookie Transactions */
1889 	if (sysctl_tcp_cookie_size > 0) {
1890 		/* Default, cookies without s_data_payload. */
1891 		tp->cookie_values =
1892 			kzalloc(sizeof(*tp->cookie_values),
1893 				sk->sk_allocation);
1894 		if (tp->cookie_values != NULL)
1895 			kref_init(&tp->cookie_values->kref);
1896 	}
1897 	/* Presumed zeroed, in order of appearance:
1898 	 *	cookie_in_always, cookie_out_never,
1899 	 *	s_data_constant, s_data_in, s_data_out
1900 	 */
1901 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1902 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1903 
1904 	local_bh_disable();
1905 	percpu_counter_inc(&tcp_sockets_allocated);
1906 	local_bh_enable();
1907 
1908 	return 0;
1909 }
1910 
1911 void tcp_v4_destroy_sock(struct sock *sk)
1912 {
1913 	struct tcp_sock *tp = tcp_sk(sk);
1914 
1915 	tcp_clear_xmit_timers(sk);
1916 
1917 	tcp_cleanup_congestion_control(sk);
1918 
1919 	/* Cleanup up the write buffer. */
1920 	tcp_write_queue_purge(sk);
1921 
1922 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1923 	__skb_queue_purge(&tp->out_of_order_queue);
1924 
1925 #ifdef CONFIG_TCP_MD5SIG
1926 	/* Clean up the MD5 key list, if any */
1927 	if (tp->md5sig_info) {
1928 		tcp_v4_clear_md5_list(sk);
1929 		kfree(tp->md5sig_info);
1930 		tp->md5sig_info = NULL;
1931 	}
1932 #endif
1933 
1934 #ifdef CONFIG_NET_DMA
1935 	/* Cleans up our sk_async_wait_queue */
1936 	__skb_queue_purge(&sk->sk_async_wait_queue);
1937 #endif
1938 
1939 	/* Clean prequeue, it must be empty really */
1940 	__skb_queue_purge(&tp->ucopy.prequeue);
1941 
1942 	/* Clean up a referenced TCP bind bucket. */
1943 	if (inet_csk(sk)->icsk_bind_hash)
1944 		inet_put_port(sk);
1945 
1946 	/*
1947 	 * If sendmsg cached page exists, toss it.
1948 	 */
1949 	if (sk->sk_sndmsg_page) {
1950 		__free_page(sk->sk_sndmsg_page);
1951 		sk->sk_sndmsg_page = NULL;
1952 	}
1953 
1954 	/* TCP Cookie Transactions */
1955 	if (tp->cookie_values != NULL) {
1956 		kref_put(&tp->cookie_values->kref,
1957 			 tcp_cookie_values_release);
1958 		tp->cookie_values = NULL;
1959 	}
1960 
1961 	percpu_counter_dec(&tcp_sockets_allocated);
1962 }
1963 
1964 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1965 
1966 #ifdef CONFIG_PROC_FS
1967 /* Proc filesystem TCP sock list dumping. */
1968 
1969 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1970 {
1971 	return hlist_nulls_empty(head) ? NULL :
1972 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1973 }
1974 
1975 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1976 {
1977 	return !is_a_nulls(tw->tw_node.next) ?
1978 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1979 }
1980 
1981 static void *listening_get_next(struct seq_file *seq, void *cur)
1982 {
1983 	struct inet_connection_sock *icsk;
1984 	struct hlist_nulls_node *node;
1985 	struct sock *sk = cur;
1986 	struct inet_listen_hashbucket *ilb;
1987 	struct tcp_iter_state *st = seq->private;
1988 	struct net *net = seq_file_net(seq);
1989 
1990 	if (!sk) {
1991 		st->bucket = 0;
1992 		ilb = &tcp_hashinfo.listening_hash[0];
1993 		spin_lock_bh(&ilb->lock);
1994 		sk = sk_nulls_head(&ilb->head);
1995 		goto get_sk;
1996 	}
1997 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1998 	++st->num;
1999 
2000 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2001 		struct request_sock *req = cur;
2002 
2003 		icsk = inet_csk(st->syn_wait_sk);
2004 		req = req->dl_next;
2005 		while (1) {
2006 			while (req) {
2007 				if (req->rsk_ops->family == st->family) {
2008 					cur = req;
2009 					goto out;
2010 				}
2011 				req = req->dl_next;
2012 			}
2013 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2014 				break;
2015 get_req:
2016 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2017 		}
2018 		sk	  = sk_next(st->syn_wait_sk);
2019 		st->state = TCP_SEQ_STATE_LISTENING;
2020 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021 	} else {
2022 		icsk = inet_csk(sk);
2023 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2024 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2025 			goto start_req;
2026 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2027 		sk = sk_next(sk);
2028 	}
2029 get_sk:
2030 	sk_nulls_for_each_from(sk, node) {
2031 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
2032 			cur = sk;
2033 			goto out;
2034 		}
2035 		icsk = inet_csk(sk);
2036 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2037 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2038 start_req:
2039 			st->uid		= sock_i_uid(sk);
2040 			st->syn_wait_sk = sk;
2041 			st->state	= TCP_SEQ_STATE_OPENREQ;
2042 			st->sbucket	= 0;
2043 			goto get_req;
2044 		}
2045 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046 	}
2047 	spin_unlock_bh(&ilb->lock);
2048 	if (++st->bucket < INET_LHTABLE_SIZE) {
2049 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2050 		spin_lock_bh(&ilb->lock);
2051 		sk = sk_nulls_head(&ilb->head);
2052 		goto get_sk;
2053 	}
2054 	cur = NULL;
2055 out:
2056 	return cur;
2057 }
2058 
2059 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2060 {
2061 	void *rc = listening_get_next(seq, NULL);
2062 
2063 	while (rc && *pos) {
2064 		rc = listening_get_next(seq, rc);
2065 		--*pos;
2066 	}
2067 	return rc;
2068 }
2069 
2070 static inline int empty_bucket(struct tcp_iter_state *st)
2071 {
2072 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2073 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2074 }
2075 
2076 static void *established_get_first(struct seq_file *seq)
2077 {
2078 	struct tcp_iter_state *st = seq->private;
2079 	struct net *net = seq_file_net(seq);
2080 	void *rc = NULL;
2081 
2082 	for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2083 		struct sock *sk;
2084 		struct hlist_nulls_node *node;
2085 		struct inet_timewait_sock *tw;
2086 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2087 
2088 		/* Lockless fast path for the common case of empty buckets */
2089 		if (empty_bucket(st))
2090 			continue;
2091 
2092 		spin_lock_bh(lock);
2093 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2094 			if (sk->sk_family != st->family ||
2095 			    !net_eq(sock_net(sk), net)) {
2096 				continue;
2097 			}
2098 			rc = sk;
2099 			goto out;
2100 		}
2101 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2102 		inet_twsk_for_each(tw, node,
2103 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2104 			if (tw->tw_family != st->family ||
2105 			    !net_eq(twsk_net(tw), net)) {
2106 				continue;
2107 			}
2108 			rc = tw;
2109 			goto out;
2110 		}
2111 		spin_unlock_bh(lock);
2112 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2113 	}
2114 out:
2115 	return rc;
2116 }
2117 
2118 static void *established_get_next(struct seq_file *seq, void *cur)
2119 {
2120 	struct sock *sk = cur;
2121 	struct inet_timewait_sock *tw;
2122 	struct hlist_nulls_node *node;
2123 	struct tcp_iter_state *st = seq->private;
2124 	struct net *net = seq_file_net(seq);
2125 
2126 	++st->num;
2127 
2128 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2129 		tw = cur;
2130 		tw = tw_next(tw);
2131 get_tw:
2132 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2133 			tw = tw_next(tw);
2134 		}
2135 		if (tw) {
2136 			cur = tw;
2137 			goto out;
2138 		}
2139 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2140 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2141 
2142 		/* Look for next non empty bucket */
2143 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2144 				empty_bucket(st))
2145 			;
2146 		if (st->bucket > tcp_hashinfo.ehash_mask)
2147 			return NULL;
2148 
2149 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2150 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2151 	} else
2152 		sk = sk_nulls_next(sk);
2153 
2154 	sk_nulls_for_each_from(sk, node) {
2155 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2156 			goto found;
2157 	}
2158 
2159 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2160 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2161 	goto get_tw;
2162 found:
2163 	cur = sk;
2164 out:
2165 	return cur;
2166 }
2167 
2168 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2169 {
2170 	void *rc = established_get_first(seq);
2171 
2172 	while (rc && pos) {
2173 		rc = established_get_next(seq, rc);
2174 		--pos;
2175 	}
2176 	return rc;
2177 }
2178 
2179 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2180 {
2181 	void *rc;
2182 	struct tcp_iter_state *st = seq->private;
2183 
2184 	st->state = TCP_SEQ_STATE_LISTENING;
2185 	rc	  = listening_get_idx(seq, &pos);
2186 
2187 	if (!rc) {
2188 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2189 		rc	  = established_get_idx(seq, pos);
2190 	}
2191 
2192 	return rc;
2193 }
2194 
2195 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2196 {
2197 	struct tcp_iter_state *st = seq->private;
2198 	st->state = TCP_SEQ_STATE_LISTENING;
2199 	st->num = 0;
2200 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2201 }
2202 
2203 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2204 {
2205 	void *rc = NULL;
2206 	struct tcp_iter_state *st;
2207 
2208 	if (v == SEQ_START_TOKEN) {
2209 		rc = tcp_get_idx(seq, 0);
2210 		goto out;
2211 	}
2212 	st = seq->private;
2213 
2214 	switch (st->state) {
2215 	case TCP_SEQ_STATE_OPENREQ:
2216 	case TCP_SEQ_STATE_LISTENING:
2217 		rc = listening_get_next(seq, v);
2218 		if (!rc) {
2219 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2220 			rc	  = established_get_first(seq);
2221 		}
2222 		break;
2223 	case TCP_SEQ_STATE_ESTABLISHED:
2224 	case TCP_SEQ_STATE_TIME_WAIT:
2225 		rc = established_get_next(seq, v);
2226 		break;
2227 	}
2228 out:
2229 	++*pos;
2230 	return rc;
2231 }
2232 
2233 static void tcp_seq_stop(struct seq_file *seq, void *v)
2234 {
2235 	struct tcp_iter_state *st = seq->private;
2236 
2237 	switch (st->state) {
2238 	case TCP_SEQ_STATE_OPENREQ:
2239 		if (v) {
2240 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2241 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2242 		}
2243 	case TCP_SEQ_STATE_LISTENING:
2244 		if (v != SEQ_START_TOKEN)
2245 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2246 		break;
2247 	case TCP_SEQ_STATE_TIME_WAIT:
2248 	case TCP_SEQ_STATE_ESTABLISHED:
2249 		if (v)
2250 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2251 		break;
2252 	}
2253 }
2254 
2255 static int tcp_seq_open(struct inode *inode, struct file *file)
2256 {
2257 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2258 	struct tcp_iter_state *s;
2259 	int err;
2260 
2261 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2262 			  sizeof(struct tcp_iter_state));
2263 	if (err < 0)
2264 		return err;
2265 
2266 	s = ((struct seq_file *)file->private_data)->private;
2267 	s->family		= afinfo->family;
2268 	return 0;
2269 }
2270 
2271 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2272 {
2273 	int rc = 0;
2274 	struct proc_dir_entry *p;
2275 
2276 	afinfo->seq_fops.open		= tcp_seq_open;
2277 	afinfo->seq_fops.read		= seq_read;
2278 	afinfo->seq_fops.llseek		= seq_lseek;
2279 	afinfo->seq_fops.release	= seq_release_net;
2280 
2281 	afinfo->seq_ops.start		= tcp_seq_start;
2282 	afinfo->seq_ops.next		= tcp_seq_next;
2283 	afinfo->seq_ops.stop		= tcp_seq_stop;
2284 
2285 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2286 			     &afinfo->seq_fops, afinfo);
2287 	if (!p)
2288 		rc = -ENOMEM;
2289 	return rc;
2290 }
2291 
2292 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2293 {
2294 	proc_net_remove(net, afinfo->name);
2295 }
2296 
2297 static void get_openreq4(struct sock *sk, struct request_sock *req,
2298 			 struct seq_file *f, int i, int uid, int *len)
2299 {
2300 	const struct inet_request_sock *ireq = inet_rsk(req);
2301 	int ttd = req->expires - jiffies;
2302 
2303 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2304 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2305 		i,
2306 		ireq->loc_addr,
2307 		ntohs(inet_sk(sk)->inet_sport),
2308 		ireq->rmt_addr,
2309 		ntohs(ireq->rmt_port),
2310 		TCP_SYN_RECV,
2311 		0, 0, /* could print option size, but that is af dependent. */
2312 		1,    /* timers active (only the expire timer) */
2313 		jiffies_to_clock_t(ttd),
2314 		req->retrans,
2315 		uid,
2316 		0,  /* non standard timer */
2317 		0, /* open_requests have no inode */
2318 		atomic_read(&sk->sk_refcnt),
2319 		req,
2320 		len);
2321 }
2322 
2323 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2324 {
2325 	int timer_active;
2326 	unsigned long timer_expires;
2327 	struct tcp_sock *tp = tcp_sk(sk);
2328 	const struct inet_connection_sock *icsk = inet_csk(sk);
2329 	struct inet_sock *inet = inet_sk(sk);
2330 	__be32 dest = inet->inet_daddr;
2331 	__be32 src = inet->inet_rcv_saddr;
2332 	__u16 destp = ntohs(inet->inet_dport);
2333 	__u16 srcp = ntohs(inet->inet_sport);
2334 	int rx_queue;
2335 
2336 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2337 		timer_active	= 1;
2338 		timer_expires	= icsk->icsk_timeout;
2339 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2340 		timer_active	= 4;
2341 		timer_expires	= icsk->icsk_timeout;
2342 	} else if (timer_pending(&sk->sk_timer)) {
2343 		timer_active	= 2;
2344 		timer_expires	= sk->sk_timer.expires;
2345 	} else {
2346 		timer_active	= 0;
2347 		timer_expires = jiffies;
2348 	}
2349 
2350 	if (sk->sk_state == TCP_LISTEN)
2351 		rx_queue = sk->sk_ack_backlog;
2352 	else
2353 		/*
2354 		 * because we dont lock socket, we might find a transient negative value
2355 		 */
2356 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2357 
2358 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2359 			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2360 		i, src, srcp, dest, destp, sk->sk_state,
2361 		tp->write_seq - tp->snd_una,
2362 		rx_queue,
2363 		timer_active,
2364 		jiffies_to_clock_t(timer_expires - jiffies),
2365 		icsk->icsk_retransmits,
2366 		sock_i_uid(sk),
2367 		icsk->icsk_probes_out,
2368 		sock_i_ino(sk),
2369 		atomic_read(&sk->sk_refcnt), sk,
2370 		jiffies_to_clock_t(icsk->icsk_rto),
2371 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2372 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2373 		tp->snd_cwnd,
2374 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2375 		len);
2376 }
2377 
2378 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2379 			       struct seq_file *f, int i, int *len)
2380 {
2381 	__be32 dest, src;
2382 	__u16 destp, srcp;
2383 	int ttd = tw->tw_ttd - jiffies;
2384 
2385 	if (ttd < 0)
2386 		ttd = 0;
2387 
2388 	dest  = tw->tw_daddr;
2389 	src   = tw->tw_rcv_saddr;
2390 	destp = ntohs(tw->tw_dport);
2391 	srcp  = ntohs(tw->tw_sport);
2392 
2393 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2394 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2395 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2396 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2397 		atomic_read(&tw->tw_refcnt), tw, len);
2398 }
2399 
2400 #define TMPSZ 150
2401 
2402 static int tcp4_seq_show(struct seq_file *seq, void *v)
2403 {
2404 	struct tcp_iter_state *st;
2405 	int len;
2406 
2407 	if (v == SEQ_START_TOKEN) {
2408 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2409 			   "  sl  local_address rem_address   st tx_queue "
2410 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2411 			   "inode");
2412 		goto out;
2413 	}
2414 	st = seq->private;
2415 
2416 	switch (st->state) {
2417 	case TCP_SEQ_STATE_LISTENING:
2418 	case TCP_SEQ_STATE_ESTABLISHED:
2419 		get_tcp4_sock(v, seq, st->num, &len);
2420 		break;
2421 	case TCP_SEQ_STATE_OPENREQ:
2422 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2423 		break;
2424 	case TCP_SEQ_STATE_TIME_WAIT:
2425 		get_timewait4_sock(v, seq, st->num, &len);
2426 		break;
2427 	}
2428 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2429 out:
2430 	return 0;
2431 }
2432 
2433 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2434 	.name		= "tcp",
2435 	.family		= AF_INET,
2436 	.seq_fops	= {
2437 		.owner		= THIS_MODULE,
2438 	},
2439 	.seq_ops	= {
2440 		.show		= tcp4_seq_show,
2441 	},
2442 };
2443 
2444 static int __net_init tcp4_proc_init_net(struct net *net)
2445 {
2446 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2447 }
2448 
2449 static void __net_exit tcp4_proc_exit_net(struct net *net)
2450 {
2451 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2452 }
2453 
2454 static struct pernet_operations tcp4_net_ops = {
2455 	.init = tcp4_proc_init_net,
2456 	.exit = tcp4_proc_exit_net,
2457 };
2458 
2459 int __init tcp4_proc_init(void)
2460 {
2461 	return register_pernet_subsys(&tcp4_net_ops);
2462 }
2463 
2464 void tcp4_proc_exit(void)
2465 {
2466 	unregister_pernet_subsys(&tcp4_net_ops);
2467 }
2468 #endif /* CONFIG_PROC_FS */
2469 
2470 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2471 {
2472 	struct iphdr *iph = skb_gro_network_header(skb);
2473 
2474 	switch (skb->ip_summed) {
2475 	case CHECKSUM_COMPLETE:
2476 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2477 				  skb->csum)) {
2478 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2479 			break;
2480 		}
2481 
2482 		/* fall through */
2483 	case CHECKSUM_NONE:
2484 		NAPI_GRO_CB(skb)->flush = 1;
2485 		return NULL;
2486 	}
2487 
2488 	return tcp_gro_receive(head, skb);
2489 }
2490 EXPORT_SYMBOL(tcp4_gro_receive);
2491 
2492 int tcp4_gro_complete(struct sk_buff *skb)
2493 {
2494 	struct iphdr *iph = ip_hdr(skb);
2495 	struct tcphdr *th = tcp_hdr(skb);
2496 
2497 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2498 				  iph->saddr, iph->daddr, 0);
2499 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2500 
2501 	return tcp_gro_complete(skb);
2502 }
2503 EXPORT_SYMBOL(tcp4_gro_complete);
2504 
2505 struct proto tcp_prot = {
2506 	.name			= "TCP",
2507 	.owner			= THIS_MODULE,
2508 	.close			= tcp_close,
2509 	.connect		= tcp_v4_connect,
2510 	.disconnect		= tcp_disconnect,
2511 	.accept			= inet_csk_accept,
2512 	.ioctl			= tcp_ioctl,
2513 	.init			= tcp_v4_init_sock,
2514 	.destroy		= tcp_v4_destroy_sock,
2515 	.shutdown		= tcp_shutdown,
2516 	.setsockopt		= tcp_setsockopt,
2517 	.getsockopt		= tcp_getsockopt,
2518 	.recvmsg		= tcp_recvmsg,
2519 	.backlog_rcv		= tcp_v4_do_rcv,
2520 	.hash			= inet_hash,
2521 	.unhash			= inet_unhash,
2522 	.get_port		= inet_csk_get_port,
2523 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2524 	.sockets_allocated	= &tcp_sockets_allocated,
2525 	.orphan_count		= &tcp_orphan_count,
2526 	.memory_allocated	= &tcp_memory_allocated,
2527 	.memory_pressure	= &tcp_memory_pressure,
2528 	.sysctl_mem		= sysctl_tcp_mem,
2529 	.sysctl_wmem		= sysctl_tcp_wmem,
2530 	.sysctl_rmem		= sysctl_tcp_rmem,
2531 	.max_header		= MAX_TCP_HEADER,
2532 	.obj_size		= sizeof(struct tcp_sock),
2533 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2534 	.twsk_prot		= &tcp_timewait_sock_ops,
2535 	.rsk_prot		= &tcp_request_sock_ops,
2536 	.h.hashinfo		= &tcp_hashinfo,
2537 #ifdef CONFIG_COMPAT
2538 	.compat_setsockopt	= compat_tcp_setsockopt,
2539 	.compat_getsockopt	= compat_tcp_getsockopt,
2540 #endif
2541 };
2542 
2543 
2544 static int __net_init tcp_sk_init(struct net *net)
2545 {
2546 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2547 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2548 }
2549 
2550 static void __net_exit tcp_sk_exit(struct net *net)
2551 {
2552 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2553 }
2554 
2555 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2556 {
2557 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2558 }
2559 
2560 static struct pernet_operations __net_initdata tcp_sk_ops = {
2561        .init	   = tcp_sk_init,
2562        .exit	   = tcp_sk_exit,
2563        .exit_batch = tcp_sk_exit_batch,
2564 };
2565 
2566 void __init tcp_v4_init(void)
2567 {
2568 	inet_hashinfo_init(&tcp_hashinfo);
2569 	if (register_pernet_subsys(&tcp_sk_ops))
2570 		panic("Failed to create the TCP control socket.\n");
2571 }
2572 
2573 EXPORT_SYMBOL(ipv4_specific);
2574 EXPORT_SYMBOL(tcp_hashinfo);
2575 EXPORT_SYMBOL(tcp_prot);
2576 EXPORT_SYMBOL(tcp_v4_conn_request);
2577 EXPORT_SYMBOL(tcp_v4_connect);
2578 EXPORT_SYMBOL(tcp_v4_do_rcv);
2579 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2580 EXPORT_SYMBOL(tcp_v4_send_check);
2581 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2582 
2583 #ifdef CONFIG_PROC_FS
2584 EXPORT_SYMBOL(tcp_proc_register);
2585 EXPORT_SYMBOL(tcp_proc_unregister);
2586 #endif
2587 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2588 
2589