xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision baa7eb025ab14f3cba2e35c0a8648f9c9f01d24f)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 
54 #include <linux/bottom_half.h>
55 #include <linux/types.h>
56 #include <linux/fcntl.h>
57 #include <linux/module.h>
58 #include <linux/random.h>
59 #include <linux/cache.h>
60 #include <linux/jhash.h>
61 #include <linux/init.h>
62 #include <linux/times.h>
63 #include <linux/slab.h>
64 
65 #include <net/net_namespace.h>
66 #include <net/icmp.h>
67 #include <net/inet_hashtables.h>
68 #include <net/tcp.h>
69 #include <net/transp_v6.h>
70 #include <net/ipv6.h>
71 #include <net/inet_common.h>
72 #include <net/timewait_sock.h>
73 #include <net/xfrm.h>
74 #include <net/netdma.h>
75 
76 #include <linux/inet.h>
77 #include <linux/ipv6.h>
78 #include <linux/stddef.h>
79 #include <linux/proc_fs.h>
80 #include <linux/seq_file.h>
81 
82 #include <linux/crypto.h>
83 #include <linux/scatterlist.h>
84 
85 int sysctl_tcp_tw_reuse __read_mostly;
86 int sysctl_tcp_low_latency __read_mostly;
87 EXPORT_SYMBOL(sysctl_tcp_low_latency);
88 
89 
90 #ifdef CONFIG_TCP_MD5SIG
91 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
92 						   __be32 addr);
93 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
94 			       __be32 daddr, __be32 saddr, struct tcphdr *th);
95 #else
96 static inline
97 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
98 {
99 	return NULL;
100 }
101 #endif
102 
103 struct inet_hashinfo tcp_hashinfo;
104 EXPORT_SYMBOL(tcp_hashinfo);
105 
106 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
107 {
108 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
109 					  ip_hdr(skb)->saddr,
110 					  tcp_hdr(skb)->dest,
111 					  tcp_hdr(skb)->source);
112 }
113 
114 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
115 {
116 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 	struct tcp_sock *tp = tcp_sk(sk);
118 
119 	/* With PAWS, it is safe from the viewpoint
120 	   of data integrity. Even without PAWS it is safe provided sequence
121 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
122 
123 	   Actually, the idea is close to VJ's one, only timestamp cache is
124 	   held not per host, but per port pair and TW bucket is used as state
125 	   holder.
126 
127 	   If TW bucket has been already destroyed we fall back to VJ's scheme
128 	   and use initial timestamp retrieved from peer table.
129 	 */
130 	if (tcptw->tw_ts_recent_stamp &&
131 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
132 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
133 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 		if (tp->write_seq == 0)
135 			tp->write_seq = 1;
136 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
137 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
138 		sock_hold(sktw);
139 		return 1;
140 	}
141 
142 	return 0;
143 }
144 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
145 
146 /* This will initiate an outgoing connection. */
147 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148 {
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct tcp_sock *tp = tcp_sk(sk);
151 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
152 	struct rtable *rt;
153 	__be32 daddr, nexthop;
154 	int tmp;
155 	int err;
156 
157 	if (addr_len < sizeof(struct sockaddr_in))
158 		return -EINVAL;
159 
160 	if (usin->sin_family != AF_INET)
161 		return -EAFNOSUPPORT;
162 
163 	nexthop = daddr = usin->sin_addr.s_addr;
164 	if (inet->opt && inet->opt->srr) {
165 		if (!daddr)
166 			return -EINVAL;
167 		nexthop = inet->opt->faddr;
168 	}
169 
170 	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
171 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
172 			       IPPROTO_TCP,
173 			       inet->inet_sport, usin->sin_port, sk, 1);
174 	if (tmp < 0) {
175 		if (tmp == -ENETUNREACH)
176 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
177 		return tmp;
178 	}
179 
180 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
181 		ip_rt_put(rt);
182 		return -ENETUNREACH;
183 	}
184 
185 	if (!inet->opt || !inet->opt->srr)
186 		daddr = rt->rt_dst;
187 
188 	if (!inet->inet_saddr)
189 		inet->inet_saddr = rt->rt_src;
190 	inet->inet_rcv_saddr = inet->inet_saddr;
191 
192 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
193 		/* Reset inherited state */
194 		tp->rx_opt.ts_recent	   = 0;
195 		tp->rx_opt.ts_recent_stamp = 0;
196 		tp->write_seq		   = 0;
197 	}
198 
199 	if (tcp_death_row.sysctl_tw_recycle &&
200 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
201 		struct inet_peer *peer = rt_get_peer(rt);
202 		/*
203 		 * VJ's idea. We save last timestamp seen from
204 		 * the destination in peer table, when entering state
205 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
206 		 * when trying new connection.
207 		 */
208 		if (peer) {
209 			inet_peer_refcheck(peer);
210 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
211 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 				tp->rx_opt.ts_recent = peer->tcp_ts;
213 			}
214 		}
215 	}
216 
217 	inet->inet_dport = usin->sin_port;
218 	inet->inet_daddr = daddr;
219 
220 	inet_csk(sk)->icsk_ext_hdr_len = 0;
221 	if (inet->opt)
222 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
223 
224 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
225 
226 	/* Socket identity is still unknown (sport may be zero).
227 	 * However we set state to SYN-SENT and not releasing socket
228 	 * lock select source port, enter ourselves into the hash tables and
229 	 * complete initialization after this.
230 	 */
231 	tcp_set_state(sk, TCP_SYN_SENT);
232 	err = inet_hash_connect(&tcp_death_row, sk);
233 	if (err)
234 		goto failure;
235 
236 	err = ip_route_newports(&rt, IPPROTO_TCP,
237 				inet->inet_sport, inet->inet_dport, sk);
238 	if (err)
239 		goto failure;
240 
241 	/* OK, now commit destination to socket.  */
242 	sk->sk_gso_type = SKB_GSO_TCPV4;
243 	sk_setup_caps(sk, &rt->dst);
244 
245 	if (!tp->write_seq)
246 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
247 							   inet->inet_daddr,
248 							   inet->inet_sport,
249 							   usin->sin_port);
250 
251 	inet->inet_id = tp->write_seq ^ jiffies;
252 
253 	err = tcp_connect(sk);
254 	rt = NULL;
255 	if (err)
256 		goto failure;
257 
258 	return 0;
259 
260 failure:
261 	/*
262 	 * This unhashes the socket and releases the local port,
263 	 * if necessary.
264 	 */
265 	tcp_set_state(sk, TCP_CLOSE);
266 	ip_rt_put(rt);
267 	sk->sk_route_caps = 0;
268 	inet->inet_dport = 0;
269 	return err;
270 }
271 EXPORT_SYMBOL(tcp_v4_connect);
272 
273 /*
274  * This routine does path mtu discovery as defined in RFC1191.
275  */
276 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
277 {
278 	struct dst_entry *dst;
279 	struct inet_sock *inet = inet_sk(sk);
280 
281 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
282 	 * send out by Linux are always <576bytes so they should go through
283 	 * unfragmented).
284 	 */
285 	if (sk->sk_state == TCP_LISTEN)
286 		return;
287 
288 	/* We don't check in the destentry if pmtu discovery is forbidden
289 	 * on this route. We just assume that no packet_to_big packets
290 	 * are send back when pmtu discovery is not active.
291 	 * There is a small race when the user changes this flag in the
292 	 * route, but I think that's acceptable.
293 	 */
294 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
295 		return;
296 
297 	dst->ops->update_pmtu(dst, mtu);
298 
299 	/* Something is about to be wrong... Remember soft error
300 	 * for the case, if this connection will not able to recover.
301 	 */
302 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
303 		sk->sk_err_soft = EMSGSIZE;
304 
305 	mtu = dst_mtu(dst);
306 
307 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
308 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
309 		tcp_sync_mss(sk, mtu);
310 
311 		/* Resend the TCP packet because it's
312 		 * clear that the old packet has been
313 		 * dropped. This is the new "fast" path mtu
314 		 * discovery.
315 		 */
316 		tcp_simple_retransmit(sk);
317 	} /* else let the usual retransmit timer handle it */
318 }
319 
320 /*
321  * This routine is called by the ICMP module when it gets some
322  * sort of error condition.  If err < 0 then the socket should
323  * be closed and the error returned to the user.  If err > 0
324  * it's just the icmp type << 8 | icmp code.  After adjustment
325  * header points to the first 8 bytes of the tcp header.  We need
326  * to find the appropriate port.
327  *
328  * The locking strategy used here is very "optimistic". When
329  * someone else accesses the socket the ICMP is just dropped
330  * and for some paths there is no check at all.
331  * A more general error queue to queue errors for later handling
332  * is probably better.
333  *
334  */
335 
336 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
337 {
338 	struct iphdr *iph = (struct iphdr *)icmp_skb->data;
339 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
340 	struct inet_connection_sock *icsk;
341 	struct tcp_sock *tp;
342 	struct inet_sock *inet;
343 	const int type = icmp_hdr(icmp_skb)->type;
344 	const int code = icmp_hdr(icmp_skb)->code;
345 	struct sock *sk;
346 	struct sk_buff *skb;
347 	__u32 seq;
348 	__u32 remaining;
349 	int err;
350 	struct net *net = dev_net(icmp_skb->dev);
351 
352 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
353 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
354 		return;
355 	}
356 
357 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
358 			iph->saddr, th->source, inet_iif(icmp_skb));
359 	if (!sk) {
360 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
361 		return;
362 	}
363 	if (sk->sk_state == TCP_TIME_WAIT) {
364 		inet_twsk_put(inet_twsk(sk));
365 		return;
366 	}
367 
368 	bh_lock_sock(sk);
369 	/* If too many ICMPs get dropped on busy
370 	 * servers this needs to be solved differently.
371 	 */
372 	if (sock_owned_by_user(sk))
373 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
374 
375 	if (sk->sk_state == TCP_CLOSE)
376 		goto out;
377 
378 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
379 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
380 		goto out;
381 	}
382 
383 	icsk = inet_csk(sk);
384 	tp = tcp_sk(sk);
385 	seq = ntohl(th->seq);
386 	if (sk->sk_state != TCP_LISTEN &&
387 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
388 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
389 		goto out;
390 	}
391 
392 	switch (type) {
393 	case ICMP_SOURCE_QUENCH:
394 		/* Just silently ignore these. */
395 		goto out;
396 	case ICMP_PARAMETERPROB:
397 		err = EPROTO;
398 		break;
399 	case ICMP_DEST_UNREACH:
400 		if (code > NR_ICMP_UNREACH)
401 			goto out;
402 
403 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
404 			if (!sock_owned_by_user(sk))
405 				do_pmtu_discovery(sk, iph, info);
406 			goto out;
407 		}
408 
409 		err = icmp_err_convert[code].errno;
410 		/* check if icmp_skb allows revert of backoff
411 		 * (see draft-zimmermann-tcp-lcd) */
412 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
413 			break;
414 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
415 		    !icsk->icsk_backoff)
416 			break;
417 
418 		if (sock_owned_by_user(sk))
419 			break;
420 
421 		icsk->icsk_backoff--;
422 		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
423 					 icsk->icsk_backoff;
424 		tcp_bound_rto(sk);
425 
426 		skb = tcp_write_queue_head(sk);
427 		BUG_ON(!skb);
428 
429 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
430 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
431 
432 		if (remaining) {
433 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
434 						  remaining, TCP_RTO_MAX);
435 		} else {
436 			/* RTO revert clocked out retransmission.
437 			 * Will retransmit now */
438 			tcp_retransmit_timer(sk);
439 		}
440 
441 		break;
442 	case ICMP_TIME_EXCEEDED:
443 		err = EHOSTUNREACH;
444 		break;
445 	default:
446 		goto out;
447 	}
448 
449 	switch (sk->sk_state) {
450 		struct request_sock *req, **prev;
451 	case TCP_LISTEN:
452 		if (sock_owned_by_user(sk))
453 			goto out;
454 
455 		req = inet_csk_search_req(sk, &prev, th->dest,
456 					  iph->daddr, iph->saddr);
457 		if (!req)
458 			goto out;
459 
460 		/* ICMPs are not backlogged, hence we cannot get
461 		   an established socket here.
462 		 */
463 		WARN_ON(req->sk);
464 
465 		if (seq != tcp_rsk(req)->snt_isn) {
466 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
467 			goto out;
468 		}
469 
470 		/*
471 		 * Still in SYN_RECV, just remove it silently.
472 		 * There is no good way to pass the error to the newly
473 		 * created socket, and POSIX does not want network
474 		 * errors returned from accept().
475 		 */
476 		inet_csk_reqsk_queue_drop(sk, req, prev);
477 		goto out;
478 
479 	case TCP_SYN_SENT:
480 	case TCP_SYN_RECV:  /* Cannot happen.
481 			       It can f.e. if SYNs crossed.
482 			     */
483 		if (!sock_owned_by_user(sk)) {
484 			sk->sk_err = err;
485 
486 			sk->sk_error_report(sk);
487 
488 			tcp_done(sk);
489 		} else {
490 			sk->sk_err_soft = err;
491 		}
492 		goto out;
493 	}
494 
495 	/* If we've already connected we will keep trying
496 	 * until we time out, or the user gives up.
497 	 *
498 	 * rfc1122 4.2.3.9 allows to consider as hard errors
499 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
500 	 * but it is obsoleted by pmtu discovery).
501 	 *
502 	 * Note, that in modern internet, where routing is unreliable
503 	 * and in each dark corner broken firewalls sit, sending random
504 	 * errors ordered by their masters even this two messages finally lose
505 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
506 	 *
507 	 * Now we are in compliance with RFCs.
508 	 *							--ANK (980905)
509 	 */
510 
511 	inet = inet_sk(sk);
512 	if (!sock_owned_by_user(sk) && inet->recverr) {
513 		sk->sk_err = err;
514 		sk->sk_error_report(sk);
515 	} else	{ /* Only an error on timeout */
516 		sk->sk_err_soft = err;
517 	}
518 
519 out:
520 	bh_unlock_sock(sk);
521 	sock_put(sk);
522 }
523 
524 static void __tcp_v4_send_check(struct sk_buff *skb,
525 				__be32 saddr, __be32 daddr)
526 {
527 	struct tcphdr *th = tcp_hdr(skb);
528 
529 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
530 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
531 		skb->csum_start = skb_transport_header(skb) - skb->head;
532 		skb->csum_offset = offsetof(struct tcphdr, check);
533 	} else {
534 		th->check = tcp_v4_check(skb->len, saddr, daddr,
535 					 csum_partial(th,
536 						      th->doff << 2,
537 						      skb->csum));
538 	}
539 }
540 
541 /* This routine computes an IPv4 TCP checksum. */
542 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
543 {
544 	struct inet_sock *inet = inet_sk(sk);
545 
546 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
547 }
548 EXPORT_SYMBOL(tcp_v4_send_check);
549 
550 int tcp_v4_gso_send_check(struct sk_buff *skb)
551 {
552 	const struct iphdr *iph;
553 	struct tcphdr *th;
554 
555 	if (!pskb_may_pull(skb, sizeof(*th)))
556 		return -EINVAL;
557 
558 	iph = ip_hdr(skb);
559 	th = tcp_hdr(skb);
560 
561 	th->check = 0;
562 	skb->ip_summed = CHECKSUM_PARTIAL;
563 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
564 	return 0;
565 }
566 
567 /*
568  *	This routine will send an RST to the other tcp.
569  *
570  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
571  *		      for reset.
572  *	Answer: if a packet caused RST, it is not for a socket
573  *		existing in our system, if it is matched to a socket,
574  *		it is just duplicate segment or bug in other side's TCP.
575  *		So that we build reply only basing on parameters
576  *		arrived with segment.
577  *	Exception: precedence violation. We do not implement it in any case.
578  */
579 
580 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
581 {
582 	struct tcphdr *th = tcp_hdr(skb);
583 	struct {
584 		struct tcphdr th;
585 #ifdef CONFIG_TCP_MD5SIG
586 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
587 #endif
588 	} rep;
589 	struct ip_reply_arg arg;
590 #ifdef CONFIG_TCP_MD5SIG
591 	struct tcp_md5sig_key *key;
592 #endif
593 	struct net *net;
594 
595 	/* Never send a reset in response to a reset. */
596 	if (th->rst)
597 		return;
598 
599 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
600 		return;
601 
602 	/* Swap the send and the receive. */
603 	memset(&rep, 0, sizeof(rep));
604 	rep.th.dest   = th->source;
605 	rep.th.source = th->dest;
606 	rep.th.doff   = sizeof(struct tcphdr) / 4;
607 	rep.th.rst    = 1;
608 
609 	if (th->ack) {
610 		rep.th.seq = th->ack_seq;
611 	} else {
612 		rep.th.ack = 1;
613 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
614 				       skb->len - (th->doff << 2));
615 	}
616 
617 	memset(&arg, 0, sizeof(arg));
618 	arg.iov[0].iov_base = (unsigned char *)&rep;
619 	arg.iov[0].iov_len  = sizeof(rep.th);
620 
621 #ifdef CONFIG_TCP_MD5SIG
622 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
623 	if (key) {
624 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
625 				   (TCPOPT_NOP << 16) |
626 				   (TCPOPT_MD5SIG << 8) |
627 				   TCPOLEN_MD5SIG);
628 		/* Update length and the length the header thinks exists */
629 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
630 		rep.th.doff = arg.iov[0].iov_len / 4;
631 
632 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
633 				     key, ip_hdr(skb)->saddr,
634 				     ip_hdr(skb)->daddr, &rep.th);
635 	}
636 #endif
637 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
638 				      ip_hdr(skb)->saddr, /* XXX */
639 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
640 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
641 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
642 
643 	net = dev_net(skb_dst(skb)->dev);
644 	ip_send_reply(net->ipv4.tcp_sock, skb,
645 		      &arg, arg.iov[0].iov_len);
646 
647 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
648 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
649 }
650 
651 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
652    outside socket context is ugly, certainly. What can I do?
653  */
654 
655 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
656 			    u32 win, u32 ts, int oif,
657 			    struct tcp_md5sig_key *key,
658 			    int reply_flags)
659 {
660 	struct tcphdr *th = tcp_hdr(skb);
661 	struct {
662 		struct tcphdr th;
663 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
664 #ifdef CONFIG_TCP_MD5SIG
665 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
666 #endif
667 			];
668 	} rep;
669 	struct ip_reply_arg arg;
670 	struct net *net = dev_net(skb_dst(skb)->dev);
671 
672 	memset(&rep.th, 0, sizeof(struct tcphdr));
673 	memset(&arg, 0, sizeof(arg));
674 
675 	arg.iov[0].iov_base = (unsigned char *)&rep;
676 	arg.iov[0].iov_len  = sizeof(rep.th);
677 	if (ts) {
678 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
679 				   (TCPOPT_TIMESTAMP << 8) |
680 				   TCPOLEN_TIMESTAMP);
681 		rep.opt[1] = htonl(tcp_time_stamp);
682 		rep.opt[2] = htonl(ts);
683 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
684 	}
685 
686 	/* Swap the send and the receive. */
687 	rep.th.dest    = th->source;
688 	rep.th.source  = th->dest;
689 	rep.th.doff    = arg.iov[0].iov_len / 4;
690 	rep.th.seq     = htonl(seq);
691 	rep.th.ack_seq = htonl(ack);
692 	rep.th.ack     = 1;
693 	rep.th.window  = htons(win);
694 
695 #ifdef CONFIG_TCP_MD5SIG
696 	if (key) {
697 		int offset = (ts) ? 3 : 0;
698 
699 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
700 					  (TCPOPT_NOP << 16) |
701 					  (TCPOPT_MD5SIG << 8) |
702 					  TCPOLEN_MD5SIG);
703 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
704 		rep.th.doff = arg.iov[0].iov_len/4;
705 
706 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
707 				    key, ip_hdr(skb)->saddr,
708 				    ip_hdr(skb)->daddr, &rep.th);
709 	}
710 #endif
711 	arg.flags = reply_flags;
712 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
713 				      ip_hdr(skb)->saddr, /* XXX */
714 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
715 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
716 	if (oif)
717 		arg.bound_dev_if = oif;
718 
719 	ip_send_reply(net->ipv4.tcp_sock, skb,
720 		      &arg, arg.iov[0].iov_len);
721 
722 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
723 }
724 
725 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
726 {
727 	struct inet_timewait_sock *tw = inet_twsk(sk);
728 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
729 
730 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
731 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
732 			tcptw->tw_ts_recent,
733 			tw->tw_bound_dev_if,
734 			tcp_twsk_md5_key(tcptw),
735 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
736 			);
737 
738 	inet_twsk_put(tw);
739 }
740 
741 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
742 				  struct request_sock *req)
743 {
744 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
745 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
746 			req->ts_recent,
747 			0,
748 			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
749 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
750 }
751 
752 /*
753  *	Send a SYN-ACK after having received a SYN.
754  *	This still operates on a request_sock only, not on a big
755  *	socket.
756  */
757 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
758 			      struct request_sock *req,
759 			      struct request_values *rvp)
760 {
761 	const struct inet_request_sock *ireq = inet_rsk(req);
762 	int err = -1;
763 	struct sk_buff * skb;
764 
765 	/* First, grab a route. */
766 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
767 		return -1;
768 
769 	skb = tcp_make_synack(sk, dst, req, rvp);
770 
771 	if (skb) {
772 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
773 
774 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
775 					    ireq->rmt_addr,
776 					    ireq->opt);
777 		err = net_xmit_eval(err);
778 	}
779 
780 	dst_release(dst);
781 	return err;
782 }
783 
784 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
785 			      struct request_values *rvp)
786 {
787 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
788 	return tcp_v4_send_synack(sk, NULL, req, rvp);
789 }
790 
791 /*
792  *	IPv4 request_sock destructor.
793  */
794 static void tcp_v4_reqsk_destructor(struct request_sock *req)
795 {
796 	kfree(inet_rsk(req)->opt);
797 }
798 
799 static void syn_flood_warning(const struct sk_buff *skb)
800 {
801 	const char *msg;
802 
803 #ifdef CONFIG_SYN_COOKIES
804 	if (sysctl_tcp_syncookies)
805 		msg = "Sending cookies";
806 	else
807 #endif
808 		msg = "Dropping request";
809 
810 	pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
811 				ntohs(tcp_hdr(skb)->dest), msg);
812 }
813 
814 /*
815  * Save and compile IPv4 options into the request_sock if needed.
816  */
817 static struct ip_options *tcp_v4_save_options(struct sock *sk,
818 					      struct sk_buff *skb)
819 {
820 	struct ip_options *opt = &(IPCB(skb)->opt);
821 	struct ip_options *dopt = NULL;
822 
823 	if (opt && opt->optlen) {
824 		int opt_size = optlength(opt);
825 		dopt = kmalloc(opt_size, GFP_ATOMIC);
826 		if (dopt) {
827 			if (ip_options_echo(dopt, skb)) {
828 				kfree(dopt);
829 				dopt = NULL;
830 			}
831 		}
832 	}
833 	return dopt;
834 }
835 
836 #ifdef CONFIG_TCP_MD5SIG
837 /*
838  * RFC2385 MD5 checksumming requires a mapping of
839  * IP address->MD5 Key.
840  * We need to maintain these in the sk structure.
841  */
842 
843 /* Find the Key structure for an address.  */
844 static struct tcp_md5sig_key *
845 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
846 {
847 	struct tcp_sock *tp = tcp_sk(sk);
848 	int i;
849 
850 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
851 		return NULL;
852 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
853 		if (tp->md5sig_info->keys4[i].addr == addr)
854 			return &tp->md5sig_info->keys4[i].base;
855 	}
856 	return NULL;
857 }
858 
859 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
860 					 struct sock *addr_sk)
861 {
862 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
863 }
864 EXPORT_SYMBOL(tcp_v4_md5_lookup);
865 
866 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
867 						      struct request_sock *req)
868 {
869 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
870 }
871 
872 /* This can be called on a newly created socket, from other files */
873 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
874 		      u8 *newkey, u8 newkeylen)
875 {
876 	/* Add Key to the list */
877 	struct tcp_md5sig_key *key;
878 	struct tcp_sock *tp = tcp_sk(sk);
879 	struct tcp4_md5sig_key *keys;
880 
881 	key = tcp_v4_md5_do_lookup(sk, addr);
882 	if (key) {
883 		/* Pre-existing entry - just update that one. */
884 		kfree(key->key);
885 		key->key = newkey;
886 		key->keylen = newkeylen;
887 	} else {
888 		struct tcp_md5sig_info *md5sig;
889 
890 		if (!tp->md5sig_info) {
891 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
892 						  GFP_ATOMIC);
893 			if (!tp->md5sig_info) {
894 				kfree(newkey);
895 				return -ENOMEM;
896 			}
897 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
898 		}
899 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
900 			kfree(newkey);
901 			return -ENOMEM;
902 		}
903 		md5sig = tp->md5sig_info;
904 
905 		if (md5sig->alloced4 == md5sig->entries4) {
906 			keys = kmalloc((sizeof(*keys) *
907 					(md5sig->entries4 + 1)), GFP_ATOMIC);
908 			if (!keys) {
909 				kfree(newkey);
910 				tcp_free_md5sig_pool();
911 				return -ENOMEM;
912 			}
913 
914 			if (md5sig->entries4)
915 				memcpy(keys, md5sig->keys4,
916 				       sizeof(*keys) * md5sig->entries4);
917 
918 			/* Free old key list, and reference new one */
919 			kfree(md5sig->keys4);
920 			md5sig->keys4 = keys;
921 			md5sig->alloced4++;
922 		}
923 		md5sig->entries4++;
924 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
925 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
926 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
927 	}
928 	return 0;
929 }
930 EXPORT_SYMBOL(tcp_v4_md5_do_add);
931 
932 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
933 			       u8 *newkey, u8 newkeylen)
934 {
935 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
936 				 newkey, newkeylen);
937 }
938 
939 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
940 {
941 	struct tcp_sock *tp = tcp_sk(sk);
942 	int i;
943 
944 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
945 		if (tp->md5sig_info->keys4[i].addr == addr) {
946 			/* Free the key */
947 			kfree(tp->md5sig_info->keys4[i].base.key);
948 			tp->md5sig_info->entries4--;
949 
950 			if (tp->md5sig_info->entries4 == 0) {
951 				kfree(tp->md5sig_info->keys4);
952 				tp->md5sig_info->keys4 = NULL;
953 				tp->md5sig_info->alloced4 = 0;
954 			} else if (tp->md5sig_info->entries4 != i) {
955 				/* Need to do some manipulation */
956 				memmove(&tp->md5sig_info->keys4[i],
957 					&tp->md5sig_info->keys4[i+1],
958 					(tp->md5sig_info->entries4 - i) *
959 					 sizeof(struct tcp4_md5sig_key));
960 			}
961 			tcp_free_md5sig_pool();
962 			return 0;
963 		}
964 	}
965 	return -ENOENT;
966 }
967 EXPORT_SYMBOL(tcp_v4_md5_do_del);
968 
969 static void tcp_v4_clear_md5_list(struct sock *sk)
970 {
971 	struct tcp_sock *tp = tcp_sk(sk);
972 
973 	/* Free each key, then the set of key keys,
974 	 * the crypto element, and then decrement our
975 	 * hold on the last resort crypto.
976 	 */
977 	if (tp->md5sig_info->entries4) {
978 		int i;
979 		for (i = 0; i < tp->md5sig_info->entries4; i++)
980 			kfree(tp->md5sig_info->keys4[i].base.key);
981 		tp->md5sig_info->entries4 = 0;
982 		tcp_free_md5sig_pool();
983 	}
984 	if (tp->md5sig_info->keys4) {
985 		kfree(tp->md5sig_info->keys4);
986 		tp->md5sig_info->keys4 = NULL;
987 		tp->md5sig_info->alloced4  = 0;
988 	}
989 }
990 
991 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
992 				 int optlen)
993 {
994 	struct tcp_md5sig cmd;
995 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
996 	u8 *newkey;
997 
998 	if (optlen < sizeof(cmd))
999 		return -EINVAL;
1000 
1001 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1002 		return -EFAULT;
1003 
1004 	if (sin->sin_family != AF_INET)
1005 		return -EINVAL;
1006 
1007 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1008 		if (!tcp_sk(sk)->md5sig_info)
1009 			return -ENOENT;
1010 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1011 	}
1012 
1013 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1014 		return -EINVAL;
1015 
1016 	if (!tcp_sk(sk)->md5sig_info) {
1017 		struct tcp_sock *tp = tcp_sk(sk);
1018 		struct tcp_md5sig_info *p;
1019 
1020 		p = kzalloc(sizeof(*p), sk->sk_allocation);
1021 		if (!p)
1022 			return -EINVAL;
1023 
1024 		tp->md5sig_info = p;
1025 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1026 	}
1027 
1028 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1029 	if (!newkey)
1030 		return -ENOMEM;
1031 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1032 				 newkey, cmd.tcpm_keylen);
1033 }
1034 
1035 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1036 					__be32 daddr, __be32 saddr, int nbytes)
1037 {
1038 	struct tcp4_pseudohdr *bp;
1039 	struct scatterlist sg;
1040 
1041 	bp = &hp->md5_blk.ip4;
1042 
1043 	/*
1044 	 * 1. the TCP pseudo-header (in the order: source IP address,
1045 	 * destination IP address, zero-padded protocol number, and
1046 	 * segment length)
1047 	 */
1048 	bp->saddr = saddr;
1049 	bp->daddr = daddr;
1050 	bp->pad = 0;
1051 	bp->protocol = IPPROTO_TCP;
1052 	bp->len = cpu_to_be16(nbytes);
1053 
1054 	sg_init_one(&sg, bp, sizeof(*bp));
1055 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1056 }
1057 
1058 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1059 			       __be32 daddr, __be32 saddr, struct tcphdr *th)
1060 {
1061 	struct tcp_md5sig_pool *hp;
1062 	struct hash_desc *desc;
1063 
1064 	hp = tcp_get_md5sig_pool();
1065 	if (!hp)
1066 		goto clear_hash_noput;
1067 	desc = &hp->md5_desc;
1068 
1069 	if (crypto_hash_init(desc))
1070 		goto clear_hash;
1071 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1072 		goto clear_hash;
1073 	if (tcp_md5_hash_header(hp, th))
1074 		goto clear_hash;
1075 	if (tcp_md5_hash_key(hp, key))
1076 		goto clear_hash;
1077 	if (crypto_hash_final(desc, md5_hash))
1078 		goto clear_hash;
1079 
1080 	tcp_put_md5sig_pool();
1081 	return 0;
1082 
1083 clear_hash:
1084 	tcp_put_md5sig_pool();
1085 clear_hash_noput:
1086 	memset(md5_hash, 0, 16);
1087 	return 1;
1088 }
1089 
1090 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1091 			struct sock *sk, struct request_sock *req,
1092 			struct sk_buff *skb)
1093 {
1094 	struct tcp_md5sig_pool *hp;
1095 	struct hash_desc *desc;
1096 	struct tcphdr *th = tcp_hdr(skb);
1097 	__be32 saddr, daddr;
1098 
1099 	if (sk) {
1100 		saddr = inet_sk(sk)->inet_saddr;
1101 		daddr = inet_sk(sk)->inet_daddr;
1102 	} else if (req) {
1103 		saddr = inet_rsk(req)->loc_addr;
1104 		daddr = inet_rsk(req)->rmt_addr;
1105 	} else {
1106 		const struct iphdr *iph = ip_hdr(skb);
1107 		saddr = iph->saddr;
1108 		daddr = iph->daddr;
1109 	}
1110 
1111 	hp = tcp_get_md5sig_pool();
1112 	if (!hp)
1113 		goto clear_hash_noput;
1114 	desc = &hp->md5_desc;
1115 
1116 	if (crypto_hash_init(desc))
1117 		goto clear_hash;
1118 
1119 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1120 		goto clear_hash;
1121 	if (tcp_md5_hash_header(hp, th))
1122 		goto clear_hash;
1123 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1124 		goto clear_hash;
1125 	if (tcp_md5_hash_key(hp, key))
1126 		goto clear_hash;
1127 	if (crypto_hash_final(desc, md5_hash))
1128 		goto clear_hash;
1129 
1130 	tcp_put_md5sig_pool();
1131 	return 0;
1132 
1133 clear_hash:
1134 	tcp_put_md5sig_pool();
1135 clear_hash_noput:
1136 	memset(md5_hash, 0, 16);
1137 	return 1;
1138 }
1139 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1140 
1141 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1142 {
1143 	/*
1144 	 * This gets called for each TCP segment that arrives
1145 	 * so we want to be efficient.
1146 	 * We have 3 drop cases:
1147 	 * o No MD5 hash and one expected.
1148 	 * o MD5 hash and we're not expecting one.
1149 	 * o MD5 hash and its wrong.
1150 	 */
1151 	__u8 *hash_location = NULL;
1152 	struct tcp_md5sig_key *hash_expected;
1153 	const struct iphdr *iph = ip_hdr(skb);
1154 	struct tcphdr *th = tcp_hdr(skb);
1155 	int genhash;
1156 	unsigned char newhash[16];
1157 
1158 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1159 	hash_location = tcp_parse_md5sig_option(th);
1160 
1161 	/* We've parsed the options - do we have a hash? */
1162 	if (!hash_expected && !hash_location)
1163 		return 0;
1164 
1165 	if (hash_expected && !hash_location) {
1166 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1167 		return 1;
1168 	}
1169 
1170 	if (!hash_expected && hash_location) {
1171 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1172 		return 1;
1173 	}
1174 
1175 	/* Okay, so this is hash_expected and hash_location -
1176 	 * so we need to calculate the checksum.
1177 	 */
1178 	genhash = tcp_v4_md5_hash_skb(newhash,
1179 				      hash_expected,
1180 				      NULL, NULL, skb);
1181 
1182 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1183 		if (net_ratelimit()) {
1184 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1185 			       &iph->saddr, ntohs(th->source),
1186 			       &iph->daddr, ntohs(th->dest),
1187 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1188 		}
1189 		return 1;
1190 	}
1191 	return 0;
1192 }
1193 
1194 #endif
1195 
1196 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1197 	.family		=	PF_INET,
1198 	.obj_size	=	sizeof(struct tcp_request_sock),
1199 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1200 	.send_ack	=	tcp_v4_reqsk_send_ack,
1201 	.destructor	=	tcp_v4_reqsk_destructor,
1202 	.send_reset	=	tcp_v4_send_reset,
1203 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1204 };
1205 
1206 #ifdef CONFIG_TCP_MD5SIG
1207 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1208 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1209 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1210 };
1211 #endif
1212 
1213 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1214 {
1215 	struct tcp_extend_values tmp_ext;
1216 	struct tcp_options_received tmp_opt;
1217 	u8 *hash_location;
1218 	struct request_sock *req;
1219 	struct inet_request_sock *ireq;
1220 	struct tcp_sock *tp = tcp_sk(sk);
1221 	struct dst_entry *dst = NULL;
1222 	__be32 saddr = ip_hdr(skb)->saddr;
1223 	__be32 daddr = ip_hdr(skb)->daddr;
1224 	__u32 isn = TCP_SKB_CB(skb)->when;
1225 #ifdef CONFIG_SYN_COOKIES
1226 	int want_cookie = 0;
1227 #else
1228 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1229 #endif
1230 
1231 	/* Never answer to SYNs send to broadcast or multicast */
1232 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1233 		goto drop;
1234 
1235 	/* TW buckets are converted to open requests without
1236 	 * limitations, they conserve resources and peer is
1237 	 * evidently real one.
1238 	 */
1239 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1240 		if (net_ratelimit())
1241 			syn_flood_warning(skb);
1242 #ifdef CONFIG_SYN_COOKIES
1243 		if (sysctl_tcp_syncookies) {
1244 			want_cookie = 1;
1245 		} else
1246 #endif
1247 		goto drop;
1248 	}
1249 
1250 	/* Accept backlog is full. If we have already queued enough
1251 	 * of warm entries in syn queue, drop request. It is better than
1252 	 * clogging syn queue with openreqs with exponentially increasing
1253 	 * timeout.
1254 	 */
1255 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1256 		goto drop;
1257 
1258 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1259 	if (!req)
1260 		goto drop;
1261 
1262 #ifdef CONFIG_TCP_MD5SIG
1263 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1264 #endif
1265 
1266 	tcp_clear_options(&tmp_opt);
1267 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1268 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1269 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1270 
1271 	if (tmp_opt.cookie_plus > 0 &&
1272 	    tmp_opt.saw_tstamp &&
1273 	    !tp->rx_opt.cookie_out_never &&
1274 	    (sysctl_tcp_cookie_size > 0 ||
1275 	     (tp->cookie_values != NULL &&
1276 	      tp->cookie_values->cookie_desired > 0))) {
1277 		u8 *c;
1278 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1279 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1280 
1281 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1282 			goto drop_and_release;
1283 
1284 		/* Secret recipe starts with IP addresses */
1285 		*mess++ ^= (__force u32)daddr;
1286 		*mess++ ^= (__force u32)saddr;
1287 
1288 		/* plus variable length Initiator Cookie */
1289 		c = (u8 *)mess;
1290 		while (l-- > 0)
1291 			*c++ ^= *hash_location++;
1292 
1293 #ifdef CONFIG_SYN_COOKIES
1294 		want_cookie = 0;	/* not our kind of cookie */
1295 #endif
1296 		tmp_ext.cookie_out_never = 0; /* false */
1297 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1298 	} else if (!tp->rx_opt.cookie_in_always) {
1299 		/* redundant indications, but ensure initialization. */
1300 		tmp_ext.cookie_out_never = 1; /* true */
1301 		tmp_ext.cookie_plus = 0;
1302 	} else {
1303 		goto drop_and_release;
1304 	}
1305 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1306 
1307 	if (want_cookie && !tmp_opt.saw_tstamp)
1308 		tcp_clear_options(&tmp_opt);
1309 
1310 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1311 	tcp_openreq_init(req, &tmp_opt, skb);
1312 
1313 	ireq = inet_rsk(req);
1314 	ireq->loc_addr = daddr;
1315 	ireq->rmt_addr = saddr;
1316 	ireq->no_srccheck = inet_sk(sk)->transparent;
1317 	ireq->opt = tcp_v4_save_options(sk, skb);
1318 
1319 	if (security_inet_conn_request(sk, skb, req))
1320 		goto drop_and_free;
1321 
1322 	if (!want_cookie || tmp_opt.tstamp_ok)
1323 		TCP_ECN_create_request(req, tcp_hdr(skb));
1324 
1325 	if (want_cookie) {
1326 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1327 		req->cookie_ts = tmp_opt.tstamp_ok;
1328 	} else if (!isn) {
1329 		struct inet_peer *peer = NULL;
1330 
1331 		/* VJ's idea. We save last timestamp seen
1332 		 * from the destination in peer table, when entering
1333 		 * state TIME-WAIT, and check against it before
1334 		 * accepting new connection request.
1335 		 *
1336 		 * If "isn" is not zero, this request hit alive
1337 		 * timewait bucket, so that all the necessary checks
1338 		 * are made in the function processing timewait state.
1339 		 */
1340 		if (tmp_opt.saw_tstamp &&
1341 		    tcp_death_row.sysctl_tw_recycle &&
1342 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1343 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1344 		    peer->daddr.a4 == saddr) {
1345 			inet_peer_refcheck(peer);
1346 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1347 			    (s32)(peer->tcp_ts - req->ts_recent) >
1348 							TCP_PAWS_WINDOW) {
1349 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1350 				goto drop_and_release;
1351 			}
1352 		}
1353 		/* Kill the following clause, if you dislike this way. */
1354 		else if (!sysctl_tcp_syncookies &&
1355 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1356 			  (sysctl_max_syn_backlog >> 2)) &&
1357 			 (!peer || !peer->tcp_ts_stamp) &&
1358 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1359 			/* Without syncookies last quarter of
1360 			 * backlog is filled with destinations,
1361 			 * proven to be alive.
1362 			 * It means that we continue to communicate
1363 			 * to destinations, already remembered
1364 			 * to the moment of synflood.
1365 			 */
1366 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1367 				       &saddr, ntohs(tcp_hdr(skb)->source));
1368 			goto drop_and_release;
1369 		}
1370 
1371 		isn = tcp_v4_init_sequence(skb);
1372 	}
1373 	tcp_rsk(req)->snt_isn = isn;
1374 
1375 	if (tcp_v4_send_synack(sk, dst, req,
1376 			       (struct request_values *)&tmp_ext) ||
1377 	    want_cookie)
1378 		goto drop_and_free;
1379 
1380 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1381 	return 0;
1382 
1383 drop_and_release:
1384 	dst_release(dst);
1385 drop_and_free:
1386 	reqsk_free(req);
1387 drop:
1388 	return 0;
1389 }
1390 EXPORT_SYMBOL(tcp_v4_conn_request);
1391 
1392 
1393 /*
1394  * The three way handshake has completed - we got a valid synack -
1395  * now create the new socket.
1396  */
1397 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1398 				  struct request_sock *req,
1399 				  struct dst_entry *dst)
1400 {
1401 	struct inet_request_sock *ireq;
1402 	struct inet_sock *newinet;
1403 	struct tcp_sock *newtp;
1404 	struct sock *newsk;
1405 #ifdef CONFIG_TCP_MD5SIG
1406 	struct tcp_md5sig_key *key;
1407 #endif
1408 
1409 	if (sk_acceptq_is_full(sk))
1410 		goto exit_overflow;
1411 
1412 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1413 		goto exit;
1414 
1415 	newsk = tcp_create_openreq_child(sk, req, skb);
1416 	if (!newsk)
1417 		goto exit_nonewsk;
1418 
1419 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1420 	sk_setup_caps(newsk, dst);
1421 
1422 	newtp		      = tcp_sk(newsk);
1423 	newinet		      = inet_sk(newsk);
1424 	ireq		      = inet_rsk(req);
1425 	newinet->inet_daddr   = ireq->rmt_addr;
1426 	newinet->inet_rcv_saddr = ireq->loc_addr;
1427 	newinet->inet_saddr	      = ireq->loc_addr;
1428 	newinet->opt	      = ireq->opt;
1429 	ireq->opt	      = NULL;
1430 	newinet->mc_index     = inet_iif(skb);
1431 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1432 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1433 	if (newinet->opt)
1434 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1435 	newinet->inet_id = newtp->write_seq ^ jiffies;
1436 
1437 	tcp_mtup_init(newsk);
1438 	tcp_sync_mss(newsk, dst_mtu(dst));
1439 	newtp->advmss = dst_metric_advmss(dst);
1440 	if (tcp_sk(sk)->rx_opt.user_mss &&
1441 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1442 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1443 
1444 	tcp_initialize_rcv_mss(newsk);
1445 
1446 #ifdef CONFIG_TCP_MD5SIG
1447 	/* Copy over the MD5 key from the original socket */
1448 	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1449 	if (key != NULL) {
1450 		/*
1451 		 * We're using one, so create a matching key
1452 		 * on the newsk structure. If we fail to get
1453 		 * memory, then we end up not copying the key
1454 		 * across. Shucks.
1455 		 */
1456 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1457 		if (newkey != NULL)
1458 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1459 					  newkey, key->keylen);
1460 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1461 	}
1462 #endif
1463 
1464 	if (__inet_inherit_port(sk, newsk) < 0) {
1465 		sock_put(newsk);
1466 		goto exit;
1467 	}
1468 	__inet_hash_nolisten(newsk, NULL);
1469 
1470 	return newsk;
1471 
1472 exit_overflow:
1473 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1474 exit_nonewsk:
1475 	dst_release(dst);
1476 exit:
1477 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1478 	return NULL;
1479 }
1480 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1481 
1482 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1483 {
1484 	struct tcphdr *th = tcp_hdr(skb);
1485 	const struct iphdr *iph = ip_hdr(skb);
1486 	struct sock *nsk;
1487 	struct request_sock **prev;
1488 	/* Find possible connection requests. */
1489 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1490 						       iph->saddr, iph->daddr);
1491 	if (req)
1492 		return tcp_check_req(sk, skb, req, prev);
1493 
1494 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1495 			th->source, iph->daddr, th->dest, inet_iif(skb));
1496 
1497 	if (nsk) {
1498 		if (nsk->sk_state != TCP_TIME_WAIT) {
1499 			bh_lock_sock(nsk);
1500 			return nsk;
1501 		}
1502 		inet_twsk_put(inet_twsk(nsk));
1503 		return NULL;
1504 	}
1505 
1506 #ifdef CONFIG_SYN_COOKIES
1507 	if (!th->syn)
1508 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1509 #endif
1510 	return sk;
1511 }
1512 
1513 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1514 {
1515 	const struct iphdr *iph = ip_hdr(skb);
1516 
1517 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1518 		if (!tcp_v4_check(skb->len, iph->saddr,
1519 				  iph->daddr, skb->csum)) {
1520 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1521 			return 0;
1522 		}
1523 	}
1524 
1525 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1526 				       skb->len, IPPROTO_TCP, 0);
1527 
1528 	if (skb->len <= 76) {
1529 		return __skb_checksum_complete(skb);
1530 	}
1531 	return 0;
1532 }
1533 
1534 
1535 /* The socket must have it's spinlock held when we get
1536  * here.
1537  *
1538  * We have a potential double-lock case here, so even when
1539  * doing backlog processing we use the BH locking scheme.
1540  * This is because we cannot sleep with the original spinlock
1541  * held.
1542  */
1543 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1544 {
1545 	struct sock *rsk;
1546 #ifdef CONFIG_TCP_MD5SIG
1547 	/*
1548 	 * We really want to reject the packet as early as possible
1549 	 * if:
1550 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1551 	 *  o There is an MD5 option and we're not expecting one
1552 	 */
1553 	if (tcp_v4_inbound_md5_hash(sk, skb))
1554 		goto discard;
1555 #endif
1556 
1557 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1558 		sock_rps_save_rxhash(sk, skb->rxhash);
1559 		TCP_CHECK_TIMER(sk);
1560 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1561 			rsk = sk;
1562 			goto reset;
1563 		}
1564 		TCP_CHECK_TIMER(sk);
1565 		return 0;
1566 	}
1567 
1568 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1569 		goto csum_err;
1570 
1571 	if (sk->sk_state == TCP_LISTEN) {
1572 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1573 		if (!nsk)
1574 			goto discard;
1575 
1576 		if (nsk != sk) {
1577 			if (tcp_child_process(sk, nsk, skb)) {
1578 				rsk = nsk;
1579 				goto reset;
1580 			}
1581 			return 0;
1582 		}
1583 	} else
1584 		sock_rps_save_rxhash(sk, skb->rxhash);
1585 
1586 
1587 	TCP_CHECK_TIMER(sk);
1588 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1589 		rsk = sk;
1590 		goto reset;
1591 	}
1592 	TCP_CHECK_TIMER(sk);
1593 	return 0;
1594 
1595 reset:
1596 	tcp_v4_send_reset(rsk, skb);
1597 discard:
1598 	kfree_skb(skb);
1599 	/* Be careful here. If this function gets more complicated and
1600 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1601 	 * might be destroyed here. This current version compiles correctly,
1602 	 * but you have been warned.
1603 	 */
1604 	return 0;
1605 
1606 csum_err:
1607 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1608 	goto discard;
1609 }
1610 EXPORT_SYMBOL(tcp_v4_do_rcv);
1611 
1612 /*
1613  *	From tcp_input.c
1614  */
1615 
1616 int tcp_v4_rcv(struct sk_buff *skb)
1617 {
1618 	const struct iphdr *iph;
1619 	struct tcphdr *th;
1620 	struct sock *sk;
1621 	int ret;
1622 	struct net *net = dev_net(skb->dev);
1623 
1624 	if (skb->pkt_type != PACKET_HOST)
1625 		goto discard_it;
1626 
1627 	/* Count it even if it's bad */
1628 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1629 
1630 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1631 		goto discard_it;
1632 
1633 	th = tcp_hdr(skb);
1634 
1635 	if (th->doff < sizeof(struct tcphdr) / 4)
1636 		goto bad_packet;
1637 	if (!pskb_may_pull(skb, th->doff * 4))
1638 		goto discard_it;
1639 
1640 	/* An explanation is required here, I think.
1641 	 * Packet length and doff are validated by header prediction,
1642 	 * provided case of th->doff==0 is eliminated.
1643 	 * So, we defer the checks. */
1644 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1645 		goto bad_packet;
1646 
1647 	th = tcp_hdr(skb);
1648 	iph = ip_hdr(skb);
1649 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1650 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1651 				    skb->len - th->doff * 4);
1652 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1653 	TCP_SKB_CB(skb)->when	 = 0;
1654 	TCP_SKB_CB(skb)->flags	 = iph->tos;
1655 	TCP_SKB_CB(skb)->sacked	 = 0;
1656 
1657 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1658 	if (!sk)
1659 		goto no_tcp_socket;
1660 
1661 process:
1662 	if (sk->sk_state == TCP_TIME_WAIT)
1663 		goto do_time_wait;
1664 
1665 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1666 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1667 		goto discard_and_relse;
1668 	}
1669 
1670 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1671 		goto discard_and_relse;
1672 	nf_reset(skb);
1673 
1674 	if (sk_filter(sk, skb))
1675 		goto discard_and_relse;
1676 
1677 	skb->dev = NULL;
1678 
1679 	bh_lock_sock_nested(sk);
1680 	ret = 0;
1681 	if (!sock_owned_by_user(sk)) {
1682 #ifdef CONFIG_NET_DMA
1683 		struct tcp_sock *tp = tcp_sk(sk);
1684 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1685 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1686 		if (tp->ucopy.dma_chan)
1687 			ret = tcp_v4_do_rcv(sk, skb);
1688 		else
1689 #endif
1690 		{
1691 			if (!tcp_prequeue(sk, skb))
1692 				ret = tcp_v4_do_rcv(sk, skb);
1693 		}
1694 	} else if (unlikely(sk_add_backlog(sk, skb))) {
1695 		bh_unlock_sock(sk);
1696 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1697 		goto discard_and_relse;
1698 	}
1699 	bh_unlock_sock(sk);
1700 
1701 	sock_put(sk);
1702 
1703 	return ret;
1704 
1705 no_tcp_socket:
1706 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1707 		goto discard_it;
1708 
1709 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1710 bad_packet:
1711 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1712 	} else {
1713 		tcp_v4_send_reset(NULL, skb);
1714 	}
1715 
1716 discard_it:
1717 	/* Discard frame. */
1718 	kfree_skb(skb);
1719 	return 0;
1720 
1721 discard_and_relse:
1722 	sock_put(sk);
1723 	goto discard_it;
1724 
1725 do_time_wait:
1726 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1727 		inet_twsk_put(inet_twsk(sk));
1728 		goto discard_it;
1729 	}
1730 
1731 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1732 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1733 		inet_twsk_put(inet_twsk(sk));
1734 		goto discard_it;
1735 	}
1736 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1737 	case TCP_TW_SYN: {
1738 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1739 							&tcp_hashinfo,
1740 							iph->daddr, th->dest,
1741 							inet_iif(skb));
1742 		if (sk2) {
1743 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1744 			inet_twsk_put(inet_twsk(sk));
1745 			sk = sk2;
1746 			goto process;
1747 		}
1748 		/* Fall through to ACK */
1749 	}
1750 	case TCP_TW_ACK:
1751 		tcp_v4_timewait_ack(sk, skb);
1752 		break;
1753 	case TCP_TW_RST:
1754 		goto no_tcp_socket;
1755 	case TCP_TW_SUCCESS:;
1756 	}
1757 	goto discard_it;
1758 }
1759 
1760 struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1761 {
1762 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1763 	struct inet_sock *inet = inet_sk(sk);
1764 	struct inet_peer *peer;
1765 
1766 	if (!rt || rt->rt_dst != inet->inet_daddr) {
1767 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
1768 		*release_it = true;
1769 	} else {
1770 		if (!rt->peer)
1771 			rt_bind_peer(rt, 1);
1772 		peer = rt->peer;
1773 		*release_it = false;
1774 	}
1775 
1776 	return peer;
1777 }
1778 EXPORT_SYMBOL(tcp_v4_get_peer);
1779 
1780 void *tcp_v4_tw_get_peer(struct sock *sk)
1781 {
1782 	struct inet_timewait_sock *tw = inet_twsk(sk);
1783 
1784 	return inet_getpeer_v4(tw->tw_daddr, 1);
1785 }
1786 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1787 
1788 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1789 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1790 	.twsk_unique	= tcp_twsk_unique,
1791 	.twsk_destructor= tcp_twsk_destructor,
1792 	.twsk_getpeer	= tcp_v4_tw_get_peer,
1793 };
1794 
1795 const struct inet_connection_sock_af_ops ipv4_specific = {
1796 	.queue_xmit	   = ip_queue_xmit,
1797 	.send_check	   = tcp_v4_send_check,
1798 	.rebuild_header	   = inet_sk_rebuild_header,
1799 	.conn_request	   = tcp_v4_conn_request,
1800 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1801 	.get_peer	   = tcp_v4_get_peer,
1802 	.net_header_len	   = sizeof(struct iphdr),
1803 	.setsockopt	   = ip_setsockopt,
1804 	.getsockopt	   = ip_getsockopt,
1805 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1806 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1807 	.bind_conflict	   = inet_csk_bind_conflict,
1808 #ifdef CONFIG_COMPAT
1809 	.compat_setsockopt = compat_ip_setsockopt,
1810 	.compat_getsockopt = compat_ip_getsockopt,
1811 #endif
1812 };
1813 EXPORT_SYMBOL(ipv4_specific);
1814 
1815 #ifdef CONFIG_TCP_MD5SIG
1816 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1817 	.md5_lookup		= tcp_v4_md5_lookup,
1818 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1819 	.md5_add		= tcp_v4_md5_add_func,
1820 	.md5_parse		= tcp_v4_parse_md5_keys,
1821 };
1822 #endif
1823 
1824 /* NOTE: A lot of things set to zero explicitly by call to
1825  *       sk_alloc() so need not be done here.
1826  */
1827 static int tcp_v4_init_sock(struct sock *sk)
1828 {
1829 	struct inet_connection_sock *icsk = inet_csk(sk);
1830 	struct tcp_sock *tp = tcp_sk(sk);
1831 
1832 	skb_queue_head_init(&tp->out_of_order_queue);
1833 	tcp_init_xmit_timers(sk);
1834 	tcp_prequeue_init(tp);
1835 
1836 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1837 	tp->mdev = TCP_TIMEOUT_INIT;
1838 
1839 	/* So many TCP implementations out there (incorrectly) count the
1840 	 * initial SYN frame in their delayed-ACK and congestion control
1841 	 * algorithms that we must have the following bandaid to talk
1842 	 * efficiently to them.  -DaveM
1843 	 */
1844 	tp->snd_cwnd = 2;
1845 
1846 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1847 	 * initialization of these values.
1848 	 */
1849 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1850 	tp->snd_cwnd_clamp = ~0;
1851 	tp->mss_cache = TCP_MSS_DEFAULT;
1852 
1853 	tp->reordering = sysctl_tcp_reordering;
1854 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1855 
1856 	sk->sk_state = TCP_CLOSE;
1857 
1858 	sk->sk_write_space = sk_stream_write_space;
1859 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1860 
1861 	icsk->icsk_af_ops = &ipv4_specific;
1862 	icsk->icsk_sync_mss = tcp_sync_mss;
1863 #ifdef CONFIG_TCP_MD5SIG
1864 	tp->af_specific = &tcp_sock_ipv4_specific;
1865 #endif
1866 
1867 	/* TCP Cookie Transactions */
1868 	if (sysctl_tcp_cookie_size > 0) {
1869 		/* Default, cookies without s_data_payload. */
1870 		tp->cookie_values =
1871 			kzalloc(sizeof(*tp->cookie_values),
1872 				sk->sk_allocation);
1873 		if (tp->cookie_values != NULL)
1874 			kref_init(&tp->cookie_values->kref);
1875 	}
1876 	/* Presumed zeroed, in order of appearance:
1877 	 *	cookie_in_always, cookie_out_never,
1878 	 *	s_data_constant, s_data_in, s_data_out
1879 	 */
1880 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1881 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1882 
1883 	local_bh_disable();
1884 	percpu_counter_inc(&tcp_sockets_allocated);
1885 	local_bh_enable();
1886 
1887 	return 0;
1888 }
1889 
1890 void tcp_v4_destroy_sock(struct sock *sk)
1891 {
1892 	struct tcp_sock *tp = tcp_sk(sk);
1893 
1894 	tcp_clear_xmit_timers(sk);
1895 
1896 	tcp_cleanup_congestion_control(sk);
1897 
1898 	/* Cleanup up the write buffer. */
1899 	tcp_write_queue_purge(sk);
1900 
1901 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1902 	__skb_queue_purge(&tp->out_of_order_queue);
1903 
1904 #ifdef CONFIG_TCP_MD5SIG
1905 	/* Clean up the MD5 key list, if any */
1906 	if (tp->md5sig_info) {
1907 		tcp_v4_clear_md5_list(sk);
1908 		kfree(tp->md5sig_info);
1909 		tp->md5sig_info = NULL;
1910 	}
1911 #endif
1912 
1913 #ifdef CONFIG_NET_DMA
1914 	/* Cleans up our sk_async_wait_queue */
1915 	__skb_queue_purge(&sk->sk_async_wait_queue);
1916 #endif
1917 
1918 	/* Clean prequeue, it must be empty really */
1919 	__skb_queue_purge(&tp->ucopy.prequeue);
1920 
1921 	/* Clean up a referenced TCP bind bucket. */
1922 	if (inet_csk(sk)->icsk_bind_hash)
1923 		inet_put_port(sk);
1924 
1925 	/*
1926 	 * If sendmsg cached page exists, toss it.
1927 	 */
1928 	if (sk->sk_sndmsg_page) {
1929 		__free_page(sk->sk_sndmsg_page);
1930 		sk->sk_sndmsg_page = NULL;
1931 	}
1932 
1933 	/* TCP Cookie Transactions */
1934 	if (tp->cookie_values != NULL) {
1935 		kref_put(&tp->cookie_values->kref,
1936 			 tcp_cookie_values_release);
1937 		tp->cookie_values = NULL;
1938 	}
1939 
1940 	percpu_counter_dec(&tcp_sockets_allocated);
1941 }
1942 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1943 
1944 #ifdef CONFIG_PROC_FS
1945 /* Proc filesystem TCP sock list dumping. */
1946 
1947 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1948 {
1949 	return hlist_nulls_empty(head) ? NULL :
1950 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1951 }
1952 
1953 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1954 {
1955 	return !is_a_nulls(tw->tw_node.next) ?
1956 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1957 }
1958 
1959 /*
1960  * Get next listener socket follow cur.  If cur is NULL, get first socket
1961  * starting from bucket given in st->bucket; when st->bucket is zero the
1962  * very first socket in the hash table is returned.
1963  */
1964 static void *listening_get_next(struct seq_file *seq, void *cur)
1965 {
1966 	struct inet_connection_sock *icsk;
1967 	struct hlist_nulls_node *node;
1968 	struct sock *sk = cur;
1969 	struct inet_listen_hashbucket *ilb;
1970 	struct tcp_iter_state *st = seq->private;
1971 	struct net *net = seq_file_net(seq);
1972 
1973 	if (!sk) {
1974 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1975 		spin_lock_bh(&ilb->lock);
1976 		sk = sk_nulls_head(&ilb->head);
1977 		st->offset = 0;
1978 		goto get_sk;
1979 	}
1980 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1981 	++st->num;
1982 	++st->offset;
1983 
1984 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1985 		struct request_sock *req = cur;
1986 
1987 		icsk = inet_csk(st->syn_wait_sk);
1988 		req = req->dl_next;
1989 		while (1) {
1990 			while (req) {
1991 				if (req->rsk_ops->family == st->family) {
1992 					cur = req;
1993 					goto out;
1994 				}
1995 				req = req->dl_next;
1996 			}
1997 			st->offset = 0;
1998 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1999 				break;
2000 get_req:
2001 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2002 		}
2003 		sk	  = sk_nulls_next(st->syn_wait_sk);
2004 		st->state = TCP_SEQ_STATE_LISTENING;
2005 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2006 	} else {
2007 		icsk = inet_csk(sk);
2008 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2009 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2010 			goto start_req;
2011 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2012 		sk = sk_nulls_next(sk);
2013 	}
2014 get_sk:
2015 	sk_nulls_for_each_from(sk, node) {
2016 		if (!net_eq(sock_net(sk), net))
2017 			continue;
2018 		if (sk->sk_family == st->family) {
2019 			cur = sk;
2020 			goto out;
2021 		}
2022 		icsk = inet_csk(sk);
2023 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2024 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2025 start_req:
2026 			st->uid		= sock_i_uid(sk);
2027 			st->syn_wait_sk = sk;
2028 			st->state	= TCP_SEQ_STATE_OPENREQ;
2029 			st->sbucket	= 0;
2030 			goto get_req;
2031 		}
2032 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2033 	}
2034 	spin_unlock_bh(&ilb->lock);
2035 	st->offset = 0;
2036 	if (++st->bucket < INET_LHTABLE_SIZE) {
2037 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2038 		spin_lock_bh(&ilb->lock);
2039 		sk = sk_nulls_head(&ilb->head);
2040 		goto get_sk;
2041 	}
2042 	cur = NULL;
2043 out:
2044 	return cur;
2045 }
2046 
2047 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2048 {
2049 	struct tcp_iter_state *st = seq->private;
2050 	void *rc;
2051 
2052 	st->bucket = 0;
2053 	st->offset = 0;
2054 	rc = listening_get_next(seq, NULL);
2055 
2056 	while (rc && *pos) {
2057 		rc = listening_get_next(seq, rc);
2058 		--*pos;
2059 	}
2060 	return rc;
2061 }
2062 
2063 static inline int empty_bucket(struct tcp_iter_state *st)
2064 {
2065 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2066 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2067 }
2068 
2069 /*
2070  * Get first established socket starting from bucket given in st->bucket.
2071  * If st->bucket is zero, the very first socket in the hash is returned.
2072  */
2073 static void *established_get_first(struct seq_file *seq)
2074 {
2075 	struct tcp_iter_state *st = seq->private;
2076 	struct net *net = seq_file_net(seq);
2077 	void *rc = NULL;
2078 
2079 	st->offset = 0;
2080 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2081 		struct sock *sk;
2082 		struct hlist_nulls_node *node;
2083 		struct inet_timewait_sock *tw;
2084 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2085 
2086 		/* Lockless fast path for the common case of empty buckets */
2087 		if (empty_bucket(st))
2088 			continue;
2089 
2090 		spin_lock_bh(lock);
2091 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2092 			if (sk->sk_family != st->family ||
2093 			    !net_eq(sock_net(sk), net)) {
2094 				continue;
2095 			}
2096 			rc = sk;
2097 			goto out;
2098 		}
2099 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2100 		inet_twsk_for_each(tw, node,
2101 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2102 			if (tw->tw_family != st->family ||
2103 			    !net_eq(twsk_net(tw), net)) {
2104 				continue;
2105 			}
2106 			rc = tw;
2107 			goto out;
2108 		}
2109 		spin_unlock_bh(lock);
2110 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2111 	}
2112 out:
2113 	return rc;
2114 }
2115 
2116 static void *established_get_next(struct seq_file *seq, void *cur)
2117 {
2118 	struct sock *sk = cur;
2119 	struct inet_timewait_sock *tw;
2120 	struct hlist_nulls_node *node;
2121 	struct tcp_iter_state *st = seq->private;
2122 	struct net *net = seq_file_net(seq);
2123 
2124 	++st->num;
2125 	++st->offset;
2126 
2127 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2128 		tw = cur;
2129 		tw = tw_next(tw);
2130 get_tw:
2131 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2132 			tw = tw_next(tw);
2133 		}
2134 		if (tw) {
2135 			cur = tw;
2136 			goto out;
2137 		}
2138 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2139 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2140 
2141 		/* Look for next non empty bucket */
2142 		st->offset = 0;
2143 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2144 				empty_bucket(st))
2145 			;
2146 		if (st->bucket > tcp_hashinfo.ehash_mask)
2147 			return NULL;
2148 
2149 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2150 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2151 	} else
2152 		sk = sk_nulls_next(sk);
2153 
2154 	sk_nulls_for_each_from(sk, node) {
2155 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2156 			goto found;
2157 	}
2158 
2159 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2160 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2161 	goto get_tw;
2162 found:
2163 	cur = sk;
2164 out:
2165 	return cur;
2166 }
2167 
2168 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2169 {
2170 	struct tcp_iter_state *st = seq->private;
2171 	void *rc;
2172 
2173 	st->bucket = 0;
2174 	rc = established_get_first(seq);
2175 
2176 	while (rc && pos) {
2177 		rc = established_get_next(seq, rc);
2178 		--pos;
2179 	}
2180 	return rc;
2181 }
2182 
2183 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2184 {
2185 	void *rc;
2186 	struct tcp_iter_state *st = seq->private;
2187 
2188 	st->state = TCP_SEQ_STATE_LISTENING;
2189 	rc	  = listening_get_idx(seq, &pos);
2190 
2191 	if (!rc) {
2192 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2193 		rc	  = established_get_idx(seq, pos);
2194 	}
2195 
2196 	return rc;
2197 }
2198 
2199 static void *tcp_seek_last_pos(struct seq_file *seq)
2200 {
2201 	struct tcp_iter_state *st = seq->private;
2202 	int offset = st->offset;
2203 	int orig_num = st->num;
2204 	void *rc = NULL;
2205 
2206 	switch (st->state) {
2207 	case TCP_SEQ_STATE_OPENREQ:
2208 	case TCP_SEQ_STATE_LISTENING:
2209 		if (st->bucket >= INET_LHTABLE_SIZE)
2210 			break;
2211 		st->state = TCP_SEQ_STATE_LISTENING;
2212 		rc = listening_get_next(seq, NULL);
2213 		while (offset-- && rc)
2214 			rc = listening_get_next(seq, rc);
2215 		if (rc)
2216 			break;
2217 		st->bucket = 0;
2218 		/* Fallthrough */
2219 	case TCP_SEQ_STATE_ESTABLISHED:
2220 	case TCP_SEQ_STATE_TIME_WAIT:
2221 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2222 		if (st->bucket > tcp_hashinfo.ehash_mask)
2223 			break;
2224 		rc = established_get_first(seq);
2225 		while (offset-- && rc)
2226 			rc = established_get_next(seq, rc);
2227 	}
2228 
2229 	st->num = orig_num;
2230 
2231 	return rc;
2232 }
2233 
2234 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2235 {
2236 	struct tcp_iter_state *st = seq->private;
2237 	void *rc;
2238 
2239 	if (*pos && *pos == st->last_pos) {
2240 		rc = tcp_seek_last_pos(seq);
2241 		if (rc)
2242 			goto out;
2243 	}
2244 
2245 	st->state = TCP_SEQ_STATE_LISTENING;
2246 	st->num = 0;
2247 	st->bucket = 0;
2248 	st->offset = 0;
2249 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2250 
2251 out:
2252 	st->last_pos = *pos;
2253 	return rc;
2254 }
2255 
2256 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2257 {
2258 	struct tcp_iter_state *st = seq->private;
2259 	void *rc = NULL;
2260 
2261 	if (v == SEQ_START_TOKEN) {
2262 		rc = tcp_get_idx(seq, 0);
2263 		goto out;
2264 	}
2265 
2266 	switch (st->state) {
2267 	case TCP_SEQ_STATE_OPENREQ:
2268 	case TCP_SEQ_STATE_LISTENING:
2269 		rc = listening_get_next(seq, v);
2270 		if (!rc) {
2271 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2272 			st->bucket = 0;
2273 			st->offset = 0;
2274 			rc	  = established_get_first(seq);
2275 		}
2276 		break;
2277 	case TCP_SEQ_STATE_ESTABLISHED:
2278 	case TCP_SEQ_STATE_TIME_WAIT:
2279 		rc = established_get_next(seq, v);
2280 		break;
2281 	}
2282 out:
2283 	++*pos;
2284 	st->last_pos = *pos;
2285 	return rc;
2286 }
2287 
2288 static void tcp_seq_stop(struct seq_file *seq, void *v)
2289 {
2290 	struct tcp_iter_state *st = seq->private;
2291 
2292 	switch (st->state) {
2293 	case TCP_SEQ_STATE_OPENREQ:
2294 		if (v) {
2295 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2296 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2297 		}
2298 	case TCP_SEQ_STATE_LISTENING:
2299 		if (v != SEQ_START_TOKEN)
2300 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2301 		break;
2302 	case TCP_SEQ_STATE_TIME_WAIT:
2303 	case TCP_SEQ_STATE_ESTABLISHED:
2304 		if (v)
2305 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2306 		break;
2307 	}
2308 }
2309 
2310 static int tcp_seq_open(struct inode *inode, struct file *file)
2311 {
2312 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2313 	struct tcp_iter_state *s;
2314 	int err;
2315 
2316 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2317 			  sizeof(struct tcp_iter_state));
2318 	if (err < 0)
2319 		return err;
2320 
2321 	s = ((struct seq_file *)file->private_data)->private;
2322 	s->family		= afinfo->family;
2323 	s->last_pos 		= 0;
2324 	return 0;
2325 }
2326 
2327 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2328 {
2329 	int rc = 0;
2330 	struct proc_dir_entry *p;
2331 
2332 	afinfo->seq_fops.open		= tcp_seq_open;
2333 	afinfo->seq_fops.read		= seq_read;
2334 	afinfo->seq_fops.llseek		= seq_lseek;
2335 	afinfo->seq_fops.release	= seq_release_net;
2336 
2337 	afinfo->seq_ops.start		= tcp_seq_start;
2338 	afinfo->seq_ops.next		= tcp_seq_next;
2339 	afinfo->seq_ops.stop		= tcp_seq_stop;
2340 
2341 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2342 			     &afinfo->seq_fops, afinfo);
2343 	if (!p)
2344 		rc = -ENOMEM;
2345 	return rc;
2346 }
2347 EXPORT_SYMBOL(tcp_proc_register);
2348 
2349 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2350 {
2351 	proc_net_remove(net, afinfo->name);
2352 }
2353 EXPORT_SYMBOL(tcp_proc_unregister);
2354 
2355 static void get_openreq4(struct sock *sk, struct request_sock *req,
2356 			 struct seq_file *f, int i, int uid, int *len)
2357 {
2358 	const struct inet_request_sock *ireq = inet_rsk(req);
2359 	int ttd = req->expires - jiffies;
2360 
2361 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2362 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2363 		i,
2364 		ireq->loc_addr,
2365 		ntohs(inet_sk(sk)->inet_sport),
2366 		ireq->rmt_addr,
2367 		ntohs(ireq->rmt_port),
2368 		TCP_SYN_RECV,
2369 		0, 0, /* could print option size, but that is af dependent. */
2370 		1,    /* timers active (only the expire timer) */
2371 		jiffies_to_clock_t(ttd),
2372 		req->retrans,
2373 		uid,
2374 		0,  /* non standard timer */
2375 		0, /* open_requests have no inode */
2376 		atomic_read(&sk->sk_refcnt),
2377 		req,
2378 		len);
2379 }
2380 
2381 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2382 {
2383 	int timer_active;
2384 	unsigned long timer_expires;
2385 	struct tcp_sock *tp = tcp_sk(sk);
2386 	const struct inet_connection_sock *icsk = inet_csk(sk);
2387 	struct inet_sock *inet = inet_sk(sk);
2388 	__be32 dest = inet->inet_daddr;
2389 	__be32 src = inet->inet_rcv_saddr;
2390 	__u16 destp = ntohs(inet->inet_dport);
2391 	__u16 srcp = ntohs(inet->inet_sport);
2392 	int rx_queue;
2393 
2394 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2395 		timer_active	= 1;
2396 		timer_expires	= icsk->icsk_timeout;
2397 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2398 		timer_active	= 4;
2399 		timer_expires	= icsk->icsk_timeout;
2400 	} else if (timer_pending(&sk->sk_timer)) {
2401 		timer_active	= 2;
2402 		timer_expires	= sk->sk_timer.expires;
2403 	} else {
2404 		timer_active	= 0;
2405 		timer_expires = jiffies;
2406 	}
2407 
2408 	if (sk->sk_state == TCP_LISTEN)
2409 		rx_queue = sk->sk_ack_backlog;
2410 	else
2411 		/*
2412 		 * because we dont lock socket, we might find a transient negative value
2413 		 */
2414 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2415 
2416 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2417 			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2418 		i, src, srcp, dest, destp, sk->sk_state,
2419 		tp->write_seq - tp->snd_una,
2420 		rx_queue,
2421 		timer_active,
2422 		jiffies_to_clock_t(timer_expires - jiffies),
2423 		icsk->icsk_retransmits,
2424 		sock_i_uid(sk),
2425 		icsk->icsk_probes_out,
2426 		sock_i_ino(sk),
2427 		atomic_read(&sk->sk_refcnt), sk,
2428 		jiffies_to_clock_t(icsk->icsk_rto),
2429 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2430 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2431 		tp->snd_cwnd,
2432 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2433 		len);
2434 }
2435 
2436 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2437 			       struct seq_file *f, int i, int *len)
2438 {
2439 	__be32 dest, src;
2440 	__u16 destp, srcp;
2441 	int ttd = tw->tw_ttd - jiffies;
2442 
2443 	if (ttd < 0)
2444 		ttd = 0;
2445 
2446 	dest  = tw->tw_daddr;
2447 	src   = tw->tw_rcv_saddr;
2448 	destp = ntohs(tw->tw_dport);
2449 	srcp  = ntohs(tw->tw_sport);
2450 
2451 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2452 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2453 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2454 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2455 		atomic_read(&tw->tw_refcnt), tw, len);
2456 }
2457 
2458 #define TMPSZ 150
2459 
2460 static int tcp4_seq_show(struct seq_file *seq, void *v)
2461 {
2462 	struct tcp_iter_state *st;
2463 	int len;
2464 
2465 	if (v == SEQ_START_TOKEN) {
2466 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2467 			   "  sl  local_address rem_address   st tx_queue "
2468 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2469 			   "inode");
2470 		goto out;
2471 	}
2472 	st = seq->private;
2473 
2474 	switch (st->state) {
2475 	case TCP_SEQ_STATE_LISTENING:
2476 	case TCP_SEQ_STATE_ESTABLISHED:
2477 		get_tcp4_sock(v, seq, st->num, &len);
2478 		break;
2479 	case TCP_SEQ_STATE_OPENREQ:
2480 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2481 		break;
2482 	case TCP_SEQ_STATE_TIME_WAIT:
2483 		get_timewait4_sock(v, seq, st->num, &len);
2484 		break;
2485 	}
2486 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2487 out:
2488 	return 0;
2489 }
2490 
2491 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2492 	.name		= "tcp",
2493 	.family		= AF_INET,
2494 	.seq_fops	= {
2495 		.owner		= THIS_MODULE,
2496 	},
2497 	.seq_ops	= {
2498 		.show		= tcp4_seq_show,
2499 	},
2500 };
2501 
2502 static int __net_init tcp4_proc_init_net(struct net *net)
2503 {
2504 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2505 }
2506 
2507 static void __net_exit tcp4_proc_exit_net(struct net *net)
2508 {
2509 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2510 }
2511 
2512 static struct pernet_operations tcp4_net_ops = {
2513 	.init = tcp4_proc_init_net,
2514 	.exit = tcp4_proc_exit_net,
2515 };
2516 
2517 int __init tcp4_proc_init(void)
2518 {
2519 	return register_pernet_subsys(&tcp4_net_ops);
2520 }
2521 
2522 void tcp4_proc_exit(void)
2523 {
2524 	unregister_pernet_subsys(&tcp4_net_ops);
2525 }
2526 #endif /* CONFIG_PROC_FS */
2527 
2528 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2529 {
2530 	struct iphdr *iph = skb_gro_network_header(skb);
2531 
2532 	switch (skb->ip_summed) {
2533 	case CHECKSUM_COMPLETE:
2534 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2535 				  skb->csum)) {
2536 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2537 			break;
2538 		}
2539 
2540 		/* fall through */
2541 	case CHECKSUM_NONE:
2542 		NAPI_GRO_CB(skb)->flush = 1;
2543 		return NULL;
2544 	}
2545 
2546 	return tcp_gro_receive(head, skb);
2547 }
2548 
2549 int tcp4_gro_complete(struct sk_buff *skb)
2550 {
2551 	struct iphdr *iph = ip_hdr(skb);
2552 	struct tcphdr *th = tcp_hdr(skb);
2553 
2554 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2555 				  iph->saddr, iph->daddr, 0);
2556 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2557 
2558 	return tcp_gro_complete(skb);
2559 }
2560 
2561 struct proto tcp_prot = {
2562 	.name			= "TCP",
2563 	.owner			= THIS_MODULE,
2564 	.close			= tcp_close,
2565 	.connect		= tcp_v4_connect,
2566 	.disconnect		= tcp_disconnect,
2567 	.accept			= inet_csk_accept,
2568 	.ioctl			= tcp_ioctl,
2569 	.init			= tcp_v4_init_sock,
2570 	.destroy		= tcp_v4_destroy_sock,
2571 	.shutdown		= tcp_shutdown,
2572 	.setsockopt		= tcp_setsockopt,
2573 	.getsockopt		= tcp_getsockopt,
2574 	.recvmsg		= tcp_recvmsg,
2575 	.sendmsg		= tcp_sendmsg,
2576 	.sendpage		= tcp_sendpage,
2577 	.backlog_rcv		= tcp_v4_do_rcv,
2578 	.hash			= inet_hash,
2579 	.unhash			= inet_unhash,
2580 	.get_port		= inet_csk_get_port,
2581 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2582 	.sockets_allocated	= &tcp_sockets_allocated,
2583 	.orphan_count		= &tcp_orphan_count,
2584 	.memory_allocated	= &tcp_memory_allocated,
2585 	.memory_pressure	= &tcp_memory_pressure,
2586 	.sysctl_mem		= sysctl_tcp_mem,
2587 	.sysctl_wmem		= sysctl_tcp_wmem,
2588 	.sysctl_rmem		= sysctl_tcp_rmem,
2589 	.max_header		= MAX_TCP_HEADER,
2590 	.obj_size		= sizeof(struct tcp_sock),
2591 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2592 	.twsk_prot		= &tcp_timewait_sock_ops,
2593 	.rsk_prot		= &tcp_request_sock_ops,
2594 	.h.hashinfo		= &tcp_hashinfo,
2595 	.no_autobind		= true,
2596 #ifdef CONFIG_COMPAT
2597 	.compat_setsockopt	= compat_tcp_setsockopt,
2598 	.compat_getsockopt	= compat_tcp_getsockopt,
2599 #endif
2600 };
2601 EXPORT_SYMBOL(tcp_prot);
2602 
2603 
2604 static int __net_init tcp_sk_init(struct net *net)
2605 {
2606 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2607 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2608 }
2609 
2610 static void __net_exit tcp_sk_exit(struct net *net)
2611 {
2612 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2613 }
2614 
2615 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2616 {
2617 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2618 }
2619 
2620 static struct pernet_operations __net_initdata tcp_sk_ops = {
2621        .init	   = tcp_sk_init,
2622        .exit	   = tcp_sk_exit,
2623        .exit_batch = tcp_sk_exit_batch,
2624 };
2625 
2626 void __init tcp_v4_init(void)
2627 {
2628 	inet_hashinfo_init(&tcp_hashinfo);
2629 	if (register_pernet_subsys(&tcp_sk_ops))
2630 		panic("Failed to create the TCP control socket.\n");
2631 }
2632