xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision a6ff1a2f1e91578860b37df9fd861ef7af207de4)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 #define pr_fmt(fmt) "TCP: " fmt
54 
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65 
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/netdma.h>
76 #include <net/secure_seq.h>
77 #include <net/tcp_memcontrol.h>
78 
79 #include <linux/inet.h>
80 #include <linux/ipv6.h>
81 #include <linux/stddef.h>
82 #include <linux/proc_fs.h>
83 #include <linux/seq_file.h>
84 
85 #include <linux/crypto.h>
86 #include <linux/scatterlist.h>
87 
88 int sysctl_tcp_tw_reuse __read_mostly;
89 int sysctl_tcp_low_latency __read_mostly;
90 EXPORT_SYMBOL(sysctl_tcp_low_latency);
91 
92 
93 #ifdef CONFIG_TCP_MD5SIG
94 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
96 #endif
97 
98 struct inet_hashinfo tcp_hashinfo;
99 EXPORT_SYMBOL(tcp_hashinfo);
100 
101 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102 {
103 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 					  ip_hdr(skb)->saddr,
105 					  tcp_hdr(skb)->dest,
106 					  tcp_hdr(skb)->source);
107 }
108 
109 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110 {
111 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 	struct tcp_sock *tp = tcp_sk(sk);
113 
114 	/* With PAWS, it is safe from the viewpoint
115 	   of data integrity. Even without PAWS it is safe provided sequence
116 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117 
118 	   Actually, the idea is close to VJ's one, only timestamp cache is
119 	   held not per host, but per port pair and TW bucket is used as state
120 	   holder.
121 
122 	   If TW bucket has been already destroyed we fall back to VJ's scheme
123 	   and use initial timestamp retrieved from peer table.
124 	 */
125 	if (tcptw->tw_ts_recent_stamp &&
126 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
127 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
128 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 		if (tp->write_seq == 0)
130 			tp->write_seq = 1;
131 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
132 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 		sock_hold(sktw);
134 		return 1;
135 	}
136 
137 	return 0;
138 }
139 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 
141 static int tcp_repair_connect(struct sock *sk)
142 {
143 	tcp_connect_init(sk);
144 	tcp_finish_connect(sk, NULL);
145 
146 	return 0;
147 }
148 
149 /* This will initiate an outgoing connection. */
150 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151 {
152 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 	struct inet_sock *inet = inet_sk(sk);
154 	struct tcp_sock *tp = tcp_sk(sk);
155 	__be16 orig_sport, orig_dport;
156 	__be32 daddr, nexthop;
157 	struct flowi4 *fl4;
158 	struct rtable *rt;
159 	int err;
160 	struct ip_options_rcu *inet_opt;
161 
162 	if (addr_len < sizeof(struct sockaddr_in))
163 		return -EINVAL;
164 
165 	if (usin->sin_family != AF_INET)
166 		return -EAFNOSUPPORT;
167 
168 	nexthop = daddr = usin->sin_addr.s_addr;
169 	inet_opt = rcu_dereference_protected(inet->inet_opt,
170 					     sock_owned_by_user(sk));
171 	if (inet_opt && inet_opt->opt.srr) {
172 		if (!daddr)
173 			return -EINVAL;
174 		nexthop = inet_opt->opt.faddr;
175 	}
176 
177 	orig_sport = inet->inet_sport;
178 	orig_dport = usin->sin_port;
179 	fl4 = &inet->cork.fl.u.ip4;
180 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
181 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
182 			      IPPROTO_TCP,
183 			      orig_sport, orig_dport, sk, true);
184 	if (IS_ERR(rt)) {
185 		err = PTR_ERR(rt);
186 		if (err == -ENETUNREACH)
187 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
188 		return err;
189 	}
190 
191 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
192 		ip_rt_put(rt);
193 		return -ENETUNREACH;
194 	}
195 
196 	if (!inet_opt || !inet_opt->opt.srr)
197 		daddr = fl4->daddr;
198 
199 	if (!inet->inet_saddr)
200 		inet->inet_saddr = fl4->saddr;
201 	inet->inet_rcv_saddr = inet->inet_saddr;
202 
203 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
204 		/* Reset inherited state */
205 		tp->rx_opt.ts_recent	   = 0;
206 		tp->rx_opt.ts_recent_stamp = 0;
207 		if (likely(!tp->repair))
208 			tp->write_seq	   = 0;
209 	}
210 
211 	if (tcp_death_row.sysctl_tw_recycle &&
212 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 		tcp_fetch_timewait_stamp(sk, &rt->dst);
214 
215 	inet->inet_dport = usin->sin_port;
216 	inet->inet_daddr = daddr;
217 
218 	inet_csk(sk)->icsk_ext_hdr_len = 0;
219 	if (inet_opt)
220 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
221 
222 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
223 
224 	/* Socket identity is still unknown (sport may be zero).
225 	 * However we set state to SYN-SENT and not releasing socket
226 	 * lock select source port, enter ourselves into the hash tables and
227 	 * complete initialization after this.
228 	 */
229 	tcp_set_state(sk, TCP_SYN_SENT);
230 	err = inet_hash_connect(&tcp_death_row, sk);
231 	if (err)
232 		goto failure;
233 
234 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
235 			       inet->inet_sport, inet->inet_dport, sk);
236 	if (IS_ERR(rt)) {
237 		err = PTR_ERR(rt);
238 		rt = NULL;
239 		goto failure;
240 	}
241 	/* OK, now commit destination to socket.  */
242 	sk->sk_gso_type = SKB_GSO_TCPV4;
243 	sk_setup_caps(sk, &rt->dst);
244 
245 	if (!tp->write_seq && likely(!tp->repair))
246 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
247 							   inet->inet_daddr,
248 							   inet->inet_sport,
249 							   usin->sin_port);
250 
251 	inet->inet_id = tp->write_seq ^ jiffies;
252 
253 	if (likely(!tp->repair))
254 		err = tcp_connect(sk);
255 	else
256 		err = tcp_repair_connect(sk);
257 
258 	rt = NULL;
259 	if (err)
260 		goto failure;
261 
262 	return 0;
263 
264 failure:
265 	/*
266 	 * This unhashes the socket and releases the local port,
267 	 * if necessary.
268 	 */
269 	tcp_set_state(sk, TCP_CLOSE);
270 	ip_rt_put(rt);
271 	sk->sk_route_caps = 0;
272 	inet->inet_dport = 0;
273 	return err;
274 }
275 EXPORT_SYMBOL(tcp_v4_connect);
276 
277 /*
278  * This routine does path mtu discovery as defined in RFC1191.
279  */
280 static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
281 {
282 	struct dst_entry *dst;
283 	struct inet_sock *inet = inet_sk(sk);
284 
285 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
286 	 * send out by Linux are always <576bytes so they should go through
287 	 * unfragmented).
288 	 */
289 	if (sk->sk_state == TCP_LISTEN)
290 		return;
291 
292 	dst = inet_csk_update_pmtu(sk, mtu);
293 	if (!dst)
294 		return;
295 
296 	/* Something is about to be wrong... Remember soft error
297 	 * for the case, if this connection will not able to recover.
298 	 */
299 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
300 		sk->sk_err_soft = EMSGSIZE;
301 
302 	mtu = dst_mtu(dst);
303 
304 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
305 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
306 		tcp_sync_mss(sk, mtu);
307 
308 		/* Resend the TCP packet because it's
309 		 * clear that the old packet has been
310 		 * dropped. This is the new "fast" path mtu
311 		 * discovery.
312 		 */
313 		tcp_simple_retransmit(sk);
314 	} /* else let the usual retransmit timer handle it */
315 }
316 
317 static void do_redirect(struct sk_buff *skb, struct sock *sk)
318 {
319 	struct dst_entry *dst = __sk_dst_check(sk, 0);
320 
321 	if (dst)
322 		dst->ops->redirect(dst, sk, skb);
323 }
324 
325 /*
326  * This routine is called by the ICMP module when it gets some
327  * sort of error condition.  If err < 0 then the socket should
328  * be closed and the error returned to the user.  If err > 0
329  * it's just the icmp type << 8 | icmp code.  After adjustment
330  * header points to the first 8 bytes of the tcp header.  We need
331  * to find the appropriate port.
332  *
333  * The locking strategy used here is very "optimistic". When
334  * someone else accesses the socket the ICMP is just dropped
335  * and for some paths there is no check at all.
336  * A more general error queue to queue errors for later handling
337  * is probably better.
338  *
339  */
340 
341 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
342 {
343 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
344 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
345 	struct inet_connection_sock *icsk;
346 	struct tcp_sock *tp;
347 	struct inet_sock *inet;
348 	const int type = icmp_hdr(icmp_skb)->type;
349 	const int code = icmp_hdr(icmp_skb)->code;
350 	struct sock *sk;
351 	struct sk_buff *skb;
352 	__u32 seq;
353 	__u32 remaining;
354 	int err;
355 	struct net *net = dev_net(icmp_skb->dev);
356 
357 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
358 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
359 		return;
360 	}
361 
362 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
363 			iph->saddr, th->source, inet_iif(icmp_skb));
364 	if (!sk) {
365 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
366 		return;
367 	}
368 	if (sk->sk_state == TCP_TIME_WAIT) {
369 		inet_twsk_put(inet_twsk(sk));
370 		return;
371 	}
372 
373 	bh_lock_sock(sk);
374 	/* If too many ICMPs get dropped on busy
375 	 * servers this needs to be solved differently.
376 	 */
377 	if (sock_owned_by_user(sk))
378 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
379 
380 	if (sk->sk_state == TCP_CLOSE)
381 		goto out;
382 
383 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
384 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
385 		goto out;
386 	}
387 
388 	icsk = inet_csk(sk);
389 	tp = tcp_sk(sk);
390 	seq = ntohl(th->seq);
391 	if (sk->sk_state != TCP_LISTEN &&
392 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
393 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
394 		goto out;
395 	}
396 
397 	switch (type) {
398 	case ICMP_REDIRECT:
399 		do_redirect(icmp_skb, sk);
400 		goto out;
401 	case ICMP_SOURCE_QUENCH:
402 		/* Just silently ignore these. */
403 		goto out;
404 	case ICMP_PARAMETERPROB:
405 		err = EPROTO;
406 		break;
407 	case ICMP_DEST_UNREACH:
408 		if (code > NR_ICMP_UNREACH)
409 			goto out;
410 
411 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
412 			if (!sock_owned_by_user(sk))
413 				do_pmtu_discovery(sk, iph, info);
414 			goto out;
415 		}
416 
417 		err = icmp_err_convert[code].errno;
418 		/* check if icmp_skb allows revert of backoff
419 		 * (see draft-zimmermann-tcp-lcd) */
420 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
421 			break;
422 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
423 		    !icsk->icsk_backoff)
424 			break;
425 
426 		if (sock_owned_by_user(sk))
427 			break;
428 
429 		icsk->icsk_backoff--;
430 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
431 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
432 		tcp_bound_rto(sk);
433 
434 		skb = tcp_write_queue_head(sk);
435 		BUG_ON(!skb);
436 
437 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
438 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
439 
440 		if (remaining) {
441 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
442 						  remaining, TCP_RTO_MAX);
443 		} else {
444 			/* RTO revert clocked out retransmission.
445 			 * Will retransmit now */
446 			tcp_retransmit_timer(sk);
447 		}
448 
449 		break;
450 	case ICMP_TIME_EXCEEDED:
451 		err = EHOSTUNREACH;
452 		break;
453 	default:
454 		goto out;
455 	}
456 
457 	switch (sk->sk_state) {
458 		struct request_sock *req, **prev;
459 	case TCP_LISTEN:
460 		if (sock_owned_by_user(sk))
461 			goto out;
462 
463 		req = inet_csk_search_req(sk, &prev, th->dest,
464 					  iph->daddr, iph->saddr);
465 		if (!req)
466 			goto out;
467 
468 		/* ICMPs are not backlogged, hence we cannot get
469 		   an established socket here.
470 		 */
471 		WARN_ON(req->sk);
472 
473 		if (seq != tcp_rsk(req)->snt_isn) {
474 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
475 			goto out;
476 		}
477 
478 		/*
479 		 * Still in SYN_RECV, just remove it silently.
480 		 * There is no good way to pass the error to the newly
481 		 * created socket, and POSIX does not want network
482 		 * errors returned from accept().
483 		 */
484 		inet_csk_reqsk_queue_drop(sk, req, prev);
485 		goto out;
486 
487 	case TCP_SYN_SENT:
488 	case TCP_SYN_RECV:  /* Cannot happen.
489 			       It can f.e. if SYNs crossed.
490 			     */
491 		if (!sock_owned_by_user(sk)) {
492 			sk->sk_err = err;
493 
494 			sk->sk_error_report(sk);
495 
496 			tcp_done(sk);
497 		} else {
498 			sk->sk_err_soft = err;
499 		}
500 		goto out;
501 	}
502 
503 	/* If we've already connected we will keep trying
504 	 * until we time out, or the user gives up.
505 	 *
506 	 * rfc1122 4.2.3.9 allows to consider as hard errors
507 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
508 	 * but it is obsoleted by pmtu discovery).
509 	 *
510 	 * Note, that in modern internet, where routing is unreliable
511 	 * and in each dark corner broken firewalls sit, sending random
512 	 * errors ordered by their masters even this two messages finally lose
513 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
514 	 *
515 	 * Now we are in compliance with RFCs.
516 	 *							--ANK (980905)
517 	 */
518 
519 	inet = inet_sk(sk);
520 	if (!sock_owned_by_user(sk) && inet->recverr) {
521 		sk->sk_err = err;
522 		sk->sk_error_report(sk);
523 	} else	{ /* Only an error on timeout */
524 		sk->sk_err_soft = err;
525 	}
526 
527 out:
528 	bh_unlock_sock(sk);
529 	sock_put(sk);
530 }
531 
532 static void __tcp_v4_send_check(struct sk_buff *skb,
533 				__be32 saddr, __be32 daddr)
534 {
535 	struct tcphdr *th = tcp_hdr(skb);
536 
537 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
538 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
539 		skb->csum_start = skb_transport_header(skb) - skb->head;
540 		skb->csum_offset = offsetof(struct tcphdr, check);
541 	} else {
542 		th->check = tcp_v4_check(skb->len, saddr, daddr,
543 					 csum_partial(th,
544 						      th->doff << 2,
545 						      skb->csum));
546 	}
547 }
548 
549 /* This routine computes an IPv4 TCP checksum. */
550 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
551 {
552 	const struct inet_sock *inet = inet_sk(sk);
553 
554 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
555 }
556 EXPORT_SYMBOL(tcp_v4_send_check);
557 
558 int tcp_v4_gso_send_check(struct sk_buff *skb)
559 {
560 	const struct iphdr *iph;
561 	struct tcphdr *th;
562 
563 	if (!pskb_may_pull(skb, sizeof(*th)))
564 		return -EINVAL;
565 
566 	iph = ip_hdr(skb);
567 	th = tcp_hdr(skb);
568 
569 	th->check = 0;
570 	skb->ip_summed = CHECKSUM_PARTIAL;
571 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
572 	return 0;
573 }
574 
575 /*
576  *	This routine will send an RST to the other tcp.
577  *
578  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
579  *		      for reset.
580  *	Answer: if a packet caused RST, it is not for a socket
581  *		existing in our system, if it is matched to a socket,
582  *		it is just duplicate segment or bug in other side's TCP.
583  *		So that we build reply only basing on parameters
584  *		arrived with segment.
585  *	Exception: precedence violation. We do not implement it in any case.
586  */
587 
588 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
589 {
590 	const struct tcphdr *th = tcp_hdr(skb);
591 	struct {
592 		struct tcphdr th;
593 #ifdef CONFIG_TCP_MD5SIG
594 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
595 #endif
596 	} rep;
597 	struct ip_reply_arg arg;
598 #ifdef CONFIG_TCP_MD5SIG
599 	struct tcp_md5sig_key *key;
600 	const __u8 *hash_location = NULL;
601 	unsigned char newhash[16];
602 	int genhash;
603 	struct sock *sk1 = NULL;
604 #endif
605 	struct net *net;
606 
607 	/* Never send a reset in response to a reset. */
608 	if (th->rst)
609 		return;
610 
611 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
612 		return;
613 
614 	/* Swap the send and the receive. */
615 	memset(&rep, 0, sizeof(rep));
616 	rep.th.dest   = th->source;
617 	rep.th.source = th->dest;
618 	rep.th.doff   = sizeof(struct tcphdr) / 4;
619 	rep.th.rst    = 1;
620 
621 	if (th->ack) {
622 		rep.th.seq = th->ack_seq;
623 	} else {
624 		rep.th.ack = 1;
625 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 				       skb->len - (th->doff << 2));
627 	}
628 
629 	memset(&arg, 0, sizeof(arg));
630 	arg.iov[0].iov_base = (unsigned char *)&rep;
631 	arg.iov[0].iov_len  = sizeof(rep.th);
632 
633 #ifdef CONFIG_TCP_MD5SIG
634 	hash_location = tcp_parse_md5sig_option(th);
635 	if (!sk && hash_location) {
636 		/*
637 		 * active side is lost. Try to find listening socket through
638 		 * source port, and then find md5 key through listening socket.
639 		 * we are not loose security here:
640 		 * Incoming packet is checked with md5 hash with finding key,
641 		 * no RST generated if md5 hash doesn't match.
642 		 */
643 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
644 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
645 					     ntohs(th->source), inet_iif(skb));
646 		/* don't send rst if it can't find key */
647 		if (!sk1)
648 			return;
649 		rcu_read_lock();
650 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
651 					&ip_hdr(skb)->saddr, AF_INET);
652 		if (!key)
653 			goto release_sk1;
654 
655 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
656 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
657 			goto release_sk1;
658 	} else {
659 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
660 					     &ip_hdr(skb)->saddr,
661 					     AF_INET) : NULL;
662 	}
663 
664 	if (key) {
665 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
666 				   (TCPOPT_NOP << 16) |
667 				   (TCPOPT_MD5SIG << 8) |
668 				   TCPOLEN_MD5SIG);
669 		/* Update length and the length the header thinks exists */
670 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
671 		rep.th.doff = arg.iov[0].iov_len / 4;
672 
673 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
674 				     key, ip_hdr(skb)->saddr,
675 				     ip_hdr(skb)->daddr, &rep.th);
676 	}
677 #endif
678 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
679 				      ip_hdr(skb)->saddr, /* XXX */
680 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
681 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
682 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
683 	/* When socket is gone, all binding information is lost.
684 	 * routing might fail in this case. using iif for oif to
685 	 * make sure we can deliver it
686 	 */
687 	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
688 
689 	net = dev_net(skb_dst(skb)->dev);
690 	arg.tos = ip_hdr(skb)->tos;
691 	ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
692 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
693 
694 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
695 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
696 
697 #ifdef CONFIG_TCP_MD5SIG
698 release_sk1:
699 	if (sk1) {
700 		rcu_read_unlock();
701 		sock_put(sk1);
702 	}
703 #endif
704 }
705 
706 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
707    outside socket context is ugly, certainly. What can I do?
708  */
709 
710 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
711 			    u32 win, u32 ts, int oif,
712 			    struct tcp_md5sig_key *key,
713 			    int reply_flags, u8 tos)
714 {
715 	const struct tcphdr *th = tcp_hdr(skb);
716 	struct {
717 		struct tcphdr th;
718 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
719 #ifdef CONFIG_TCP_MD5SIG
720 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
721 #endif
722 			];
723 	} rep;
724 	struct ip_reply_arg arg;
725 	struct net *net = dev_net(skb_dst(skb)->dev);
726 
727 	memset(&rep.th, 0, sizeof(struct tcphdr));
728 	memset(&arg, 0, sizeof(arg));
729 
730 	arg.iov[0].iov_base = (unsigned char *)&rep;
731 	arg.iov[0].iov_len  = sizeof(rep.th);
732 	if (ts) {
733 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
734 				   (TCPOPT_TIMESTAMP << 8) |
735 				   TCPOLEN_TIMESTAMP);
736 		rep.opt[1] = htonl(tcp_time_stamp);
737 		rep.opt[2] = htonl(ts);
738 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
739 	}
740 
741 	/* Swap the send and the receive. */
742 	rep.th.dest    = th->source;
743 	rep.th.source  = th->dest;
744 	rep.th.doff    = arg.iov[0].iov_len / 4;
745 	rep.th.seq     = htonl(seq);
746 	rep.th.ack_seq = htonl(ack);
747 	rep.th.ack     = 1;
748 	rep.th.window  = htons(win);
749 
750 #ifdef CONFIG_TCP_MD5SIG
751 	if (key) {
752 		int offset = (ts) ? 3 : 0;
753 
754 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
755 					  (TCPOPT_NOP << 16) |
756 					  (TCPOPT_MD5SIG << 8) |
757 					  TCPOLEN_MD5SIG);
758 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
759 		rep.th.doff = arg.iov[0].iov_len/4;
760 
761 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
762 				    key, ip_hdr(skb)->saddr,
763 				    ip_hdr(skb)->daddr, &rep.th);
764 	}
765 #endif
766 	arg.flags = reply_flags;
767 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
768 				      ip_hdr(skb)->saddr, /* XXX */
769 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
770 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
771 	if (oif)
772 		arg.bound_dev_if = oif;
773 	arg.tos = tos;
774 	ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
775 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
776 
777 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
778 }
779 
780 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
781 {
782 	struct inet_timewait_sock *tw = inet_twsk(sk);
783 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
784 
785 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
786 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
787 			tcptw->tw_ts_recent,
788 			tw->tw_bound_dev_if,
789 			tcp_twsk_md5_key(tcptw),
790 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
791 			tw->tw_tos
792 			);
793 
794 	inet_twsk_put(tw);
795 }
796 
797 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
798 				  struct request_sock *req)
799 {
800 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
801 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
802 			req->ts_recent,
803 			0,
804 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
805 					  AF_INET),
806 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
807 			ip_hdr(skb)->tos);
808 }
809 
810 /*
811  *	Send a SYN-ACK after having received a SYN.
812  *	This still operates on a request_sock only, not on a big
813  *	socket.
814  */
815 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
816 			      struct request_sock *req,
817 			      struct request_values *rvp,
818 			      u16 queue_mapping,
819 			      bool nocache)
820 {
821 	const struct inet_request_sock *ireq = inet_rsk(req);
822 	struct flowi4 fl4;
823 	int err = -1;
824 	struct sk_buff * skb;
825 
826 	/* First, grab a route. */
827 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL)
828 		return -1;
829 
830 	skb = tcp_make_synack(sk, dst, req, rvp);
831 
832 	if (skb) {
833 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
834 
835 		skb_set_queue_mapping(skb, queue_mapping);
836 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
837 					    ireq->rmt_addr,
838 					    ireq->opt);
839 		err = net_xmit_eval(err);
840 	}
841 
842 	return err;
843 }
844 
845 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
846 			      struct request_values *rvp)
847 {
848 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
849 	return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
850 }
851 
852 /*
853  *	IPv4 request_sock destructor.
854  */
855 static void tcp_v4_reqsk_destructor(struct request_sock *req)
856 {
857 	kfree(inet_rsk(req)->opt);
858 }
859 
860 /*
861  * Return true if a syncookie should be sent
862  */
863 bool tcp_syn_flood_action(struct sock *sk,
864 			 const struct sk_buff *skb,
865 			 const char *proto)
866 {
867 	const char *msg = "Dropping request";
868 	bool want_cookie = false;
869 	struct listen_sock *lopt;
870 
871 
872 
873 #ifdef CONFIG_SYN_COOKIES
874 	if (sysctl_tcp_syncookies) {
875 		msg = "Sending cookies";
876 		want_cookie = true;
877 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
878 	} else
879 #endif
880 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
881 
882 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
883 	if (!lopt->synflood_warned) {
884 		lopt->synflood_warned = 1;
885 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
886 			proto, ntohs(tcp_hdr(skb)->dest), msg);
887 	}
888 	return want_cookie;
889 }
890 EXPORT_SYMBOL(tcp_syn_flood_action);
891 
892 /*
893  * Save and compile IPv4 options into the request_sock if needed.
894  */
895 static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
896 						  struct sk_buff *skb)
897 {
898 	const struct ip_options *opt = &(IPCB(skb)->opt);
899 	struct ip_options_rcu *dopt = NULL;
900 
901 	if (opt && opt->optlen) {
902 		int opt_size = sizeof(*dopt) + opt->optlen;
903 
904 		dopt = kmalloc(opt_size, GFP_ATOMIC);
905 		if (dopt) {
906 			if (ip_options_echo(&dopt->opt, skb)) {
907 				kfree(dopt);
908 				dopt = NULL;
909 			}
910 		}
911 	}
912 	return dopt;
913 }
914 
915 #ifdef CONFIG_TCP_MD5SIG
916 /*
917  * RFC2385 MD5 checksumming requires a mapping of
918  * IP address->MD5 Key.
919  * We need to maintain these in the sk structure.
920  */
921 
922 /* Find the Key structure for an address.  */
923 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
924 					 const union tcp_md5_addr *addr,
925 					 int family)
926 {
927 	struct tcp_sock *tp = tcp_sk(sk);
928 	struct tcp_md5sig_key *key;
929 	struct hlist_node *pos;
930 	unsigned int size = sizeof(struct in_addr);
931 	struct tcp_md5sig_info *md5sig;
932 
933 	/* caller either holds rcu_read_lock() or socket lock */
934 	md5sig = rcu_dereference_check(tp->md5sig_info,
935 				       sock_owned_by_user(sk) ||
936 				       lockdep_is_held(&sk->sk_lock.slock));
937 	if (!md5sig)
938 		return NULL;
939 #if IS_ENABLED(CONFIG_IPV6)
940 	if (family == AF_INET6)
941 		size = sizeof(struct in6_addr);
942 #endif
943 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
944 		if (key->family != family)
945 			continue;
946 		if (!memcmp(&key->addr, addr, size))
947 			return key;
948 	}
949 	return NULL;
950 }
951 EXPORT_SYMBOL(tcp_md5_do_lookup);
952 
953 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
954 					 struct sock *addr_sk)
955 {
956 	union tcp_md5_addr *addr;
957 
958 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
959 	return tcp_md5_do_lookup(sk, addr, AF_INET);
960 }
961 EXPORT_SYMBOL(tcp_v4_md5_lookup);
962 
963 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
964 						      struct request_sock *req)
965 {
966 	union tcp_md5_addr *addr;
967 
968 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
969 	return tcp_md5_do_lookup(sk, addr, AF_INET);
970 }
971 
972 /* This can be called on a newly created socket, from other files */
973 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
974 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
975 {
976 	/* Add Key to the list */
977 	struct tcp_md5sig_key *key;
978 	struct tcp_sock *tp = tcp_sk(sk);
979 	struct tcp_md5sig_info *md5sig;
980 
981 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
982 	if (key) {
983 		/* Pre-existing entry - just update that one. */
984 		memcpy(key->key, newkey, newkeylen);
985 		key->keylen = newkeylen;
986 		return 0;
987 	}
988 
989 	md5sig = rcu_dereference_protected(tp->md5sig_info,
990 					   sock_owned_by_user(sk));
991 	if (!md5sig) {
992 		md5sig = kmalloc(sizeof(*md5sig), gfp);
993 		if (!md5sig)
994 			return -ENOMEM;
995 
996 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
997 		INIT_HLIST_HEAD(&md5sig->head);
998 		rcu_assign_pointer(tp->md5sig_info, md5sig);
999 	}
1000 
1001 	key = sock_kmalloc(sk, sizeof(*key), gfp);
1002 	if (!key)
1003 		return -ENOMEM;
1004 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1005 		sock_kfree_s(sk, key, sizeof(*key));
1006 		return -ENOMEM;
1007 	}
1008 
1009 	memcpy(key->key, newkey, newkeylen);
1010 	key->keylen = newkeylen;
1011 	key->family = family;
1012 	memcpy(&key->addr, addr,
1013 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1014 				      sizeof(struct in_addr));
1015 	hlist_add_head_rcu(&key->node, &md5sig->head);
1016 	return 0;
1017 }
1018 EXPORT_SYMBOL(tcp_md5_do_add);
1019 
1020 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1021 {
1022 	struct tcp_sock *tp = tcp_sk(sk);
1023 	struct tcp_md5sig_key *key;
1024 	struct tcp_md5sig_info *md5sig;
1025 
1026 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1027 	if (!key)
1028 		return -ENOENT;
1029 	hlist_del_rcu(&key->node);
1030 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1031 	kfree_rcu(key, rcu);
1032 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1033 					   sock_owned_by_user(sk));
1034 	if (hlist_empty(&md5sig->head))
1035 		tcp_free_md5sig_pool();
1036 	return 0;
1037 }
1038 EXPORT_SYMBOL(tcp_md5_do_del);
1039 
1040 void tcp_clear_md5_list(struct sock *sk)
1041 {
1042 	struct tcp_sock *tp = tcp_sk(sk);
1043 	struct tcp_md5sig_key *key;
1044 	struct hlist_node *pos, *n;
1045 	struct tcp_md5sig_info *md5sig;
1046 
1047 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1048 
1049 	if (!hlist_empty(&md5sig->head))
1050 		tcp_free_md5sig_pool();
1051 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1052 		hlist_del_rcu(&key->node);
1053 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1054 		kfree_rcu(key, rcu);
1055 	}
1056 }
1057 
1058 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1059 				 int optlen)
1060 {
1061 	struct tcp_md5sig cmd;
1062 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1063 
1064 	if (optlen < sizeof(cmd))
1065 		return -EINVAL;
1066 
1067 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1068 		return -EFAULT;
1069 
1070 	if (sin->sin_family != AF_INET)
1071 		return -EINVAL;
1072 
1073 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1074 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1075 				      AF_INET);
1076 
1077 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1078 		return -EINVAL;
1079 
1080 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1081 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1082 			      GFP_KERNEL);
1083 }
1084 
1085 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1086 					__be32 daddr, __be32 saddr, int nbytes)
1087 {
1088 	struct tcp4_pseudohdr *bp;
1089 	struct scatterlist sg;
1090 
1091 	bp = &hp->md5_blk.ip4;
1092 
1093 	/*
1094 	 * 1. the TCP pseudo-header (in the order: source IP address,
1095 	 * destination IP address, zero-padded protocol number, and
1096 	 * segment length)
1097 	 */
1098 	bp->saddr = saddr;
1099 	bp->daddr = daddr;
1100 	bp->pad = 0;
1101 	bp->protocol = IPPROTO_TCP;
1102 	bp->len = cpu_to_be16(nbytes);
1103 
1104 	sg_init_one(&sg, bp, sizeof(*bp));
1105 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1106 }
1107 
1108 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1109 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1110 {
1111 	struct tcp_md5sig_pool *hp;
1112 	struct hash_desc *desc;
1113 
1114 	hp = tcp_get_md5sig_pool();
1115 	if (!hp)
1116 		goto clear_hash_noput;
1117 	desc = &hp->md5_desc;
1118 
1119 	if (crypto_hash_init(desc))
1120 		goto clear_hash;
1121 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1122 		goto clear_hash;
1123 	if (tcp_md5_hash_header(hp, th))
1124 		goto clear_hash;
1125 	if (tcp_md5_hash_key(hp, key))
1126 		goto clear_hash;
1127 	if (crypto_hash_final(desc, md5_hash))
1128 		goto clear_hash;
1129 
1130 	tcp_put_md5sig_pool();
1131 	return 0;
1132 
1133 clear_hash:
1134 	tcp_put_md5sig_pool();
1135 clear_hash_noput:
1136 	memset(md5_hash, 0, 16);
1137 	return 1;
1138 }
1139 
1140 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1141 			const struct sock *sk, const struct request_sock *req,
1142 			const struct sk_buff *skb)
1143 {
1144 	struct tcp_md5sig_pool *hp;
1145 	struct hash_desc *desc;
1146 	const struct tcphdr *th = tcp_hdr(skb);
1147 	__be32 saddr, daddr;
1148 
1149 	if (sk) {
1150 		saddr = inet_sk(sk)->inet_saddr;
1151 		daddr = inet_sk(sk)->inet_daddr;
1152 	} else if (req) {
1153 		saddr = inet_rsk(req)->loc_addr;
1154 		daddr = inet_rsk(req)->rmt_addr;
1155 	} else {
1156 		const struct iphdr *iph = ip_hdr(skb);
1157 		saddr = iph->saddr;
1158 		daddr = iph->daddr;
1159 	}
1160 
1161 	hp = tcp_get_md5sig_pool();
1162 	if (!hp)
1163 		goto clear_hash_noput;
1164 	desc = &hp->md5_desc;
1165 
1166 	if (crypto_hash_init(desc))
1167 		goto clear_hash;
1168 
1169 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1170 		goto clear_hash;
1171 	if (tcp_md5_hash_header(hp, th))
1172 		goto clear_hash;
1173 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1174 		goto clear_hash;
1175 	if (tcp_md5_hash_key(hp, key))
1176 		goto clear_hash;
1177 	if (crypto_hash_final(desc, md5_hash))
1178 		goto clear_hash;
1179 
1180 	tcp_put_md5sig_pool();
1181 	return 0;
1182 
1183 clear_hash:
1184 	tcp_put_md5sig_pool();
1185 clear_hash_noput:
1186 	memset(md5_hash, 0, 16);
1187 	return 1;
1188 }
1189 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1190 
1191 static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1192 {
1193 	/*
1194 	 * This gets called for each TCP segment that arrives
1195 	 * so we want to be efficient.
1196 	 * We have 3 drop cases:
1197 	 * o No MD5 hash and one expected.
1198 	 * o MD5 hash and we're not expecting one.
1199 	 * o MD5 hash and its wrong.
1200 	 */
1201 	const __u8 *hash_location = NULL;
1202 	struct tcp_md5sig_key *hash_expected;
1203 	const struct iphdr *iph = ip_hdr(skb);
1204 	const struct tcphdr *th = tcp_hdr(skb);
1205 	int genhash;
1206 	unsigned char newhash[16];
1207 
1208 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1209 					  AF_INET);
1210 	hash_location = tcp_parse_md5sig_option(th);
1211 
1212 	/* We've parsed the options - do we have a hash? */
1213 	if (!hash_expected && !hash_location)
1214 		return false;
1215 
1216 	if (hash_expected && !hash_location) {
1217 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1218 		return true;
1219 	}
1220 
1221 	if (!hash_expected && hash_location) {
1222 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1223 		return true;
1224 	}
1225 
1226 	/* Okay, so this is hash_expected and hash_location -
1227 	 * so we need to calculate the checksum.
1228 	 */
1229 	genhash = tcp_v4_md5_hash_skb(newhash,
1230 				      hash_expected,
1231 				      NULL, NULL, skb);
1232 
1233 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1234 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1235 				     &iph->saddr, ntohs(th->source),
1236 				     &iph->daddr, ntohs(th->dest),
1237 				     genhash ? " tcp_v4_calc_md5_hash failed"
1238 				     : "");
1239 		return true;
1240 	}
1241 	return false;
1242 }
1243 
1244 #endif
1245 
1246 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1247 	.family		=	PF_INET,
1248 	.obj_size	=	sizeof(struct tcp_request_sock),
1249 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1250 	.send_ack	=	tcp_v4_reqsk_send_ack,
1251 	.destructor	=	tcp_v4_reqsk_destructor,
1252 	.send_reset	=	tcp_v4_send_reset,
1253 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1254 };
1255 
1256 #ifdef CONFIG_TCP_MD5SIG
1257 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1258 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1259 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1260 };
1261 #endif
1262 
1263 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1264 {
1265 	struct tcp_extend_values tmp_ext;
1266 	struct tcp_options_received tmp_opt;
1267 	const u8 *hash_location;
1268 	struct request_sock *req;
1269 	struct inet_request_sock *ireq;
1270 	struct tcp_sock *tp = tcp_sk(sk);
1271 	struct dst_entry *dst = NULL;
1272 	__be32 saddr = ip_hdr(skb)->saddr;
1273 	__be32 daddr = ip_hdr(skb)->daddr;
1274 	__u32 isn = TCP_SKB_CB(skb)->when;
1275 	bool want_cookie = false;
1276 
1277 	/* Never answer to SYNs send to broadcast or multicast */
1278 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1279 		goto drop;
1280 
1281 	/* TW buckets are converted to open requests without
1282 	 * limitations, they conserve resources and peer is
1283 	 * evidently real one.
1284 	 */
1285 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1286 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1287 		if (!want_cookie)
1288 			goto drop;
1289 	}
1290 
1291 	/* Accept backlog is full. If we have already queued enough
1292 	 * of warm entries in syn queue, drop request. It is better than
1293 	 * clogging syn queue with openreqs with exponentially increasing
1294 	 * timeout.
1295 	 */
1296 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1297 		goto drop;
1298 
1299 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1300 	if (!req)
1301 		goto drop;
1302 
1303 #ifdef CONFIG_TCP_MD5SIG
1304 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1305 #endif
1306 
1307 	tcp_clear_options(&tmp_opt);
1308 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1309 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1310 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1311 
1312 	if (tmp_opt.cookie_plus > 0 &&
1313 	    tmp_opt.saw_tstamp &&
1314 	    !tp->rx_opt.cookie_out_never &&
1315 	    (sysctl_tcp_cookie_size > 0 ||
1316 	     (tp->cookie_values != NULL &&
1317 	      tp->cookie_values->cookie_desired > 0))) {
1318 		u8 *c;
1319 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1320 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1321 
1322 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1323 			goto drop_and_release;
1324 
1325 		/* Secret recipe starts with IP addresses */
1326 		*mess++ ^= (__force u32)daddr;
1327 		*mess++ ^= (__force u32)saddr;
1328 
1329 		/* plus variable length Initiator Cookie */
1330 		c = (u8 *)mess;
1331 		while (l-- > 0)
1332 			*c++ ^= *hash_location++;
1333 
1334 		want_cookie = false;	/* not our kind of cookie */
1335 		tmp_ext.cookie_out_never = 0; /* false */
1336 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1337 	} else if (!tp->rx_opt.cookie_in_always) {
1338 		/* redundant indications, but ensure initialization. */
1339 		tmp_ext.cookie_out_never = 1; /* true */
1340 		tmp_ext.cookie_plus = 0;
1341 	} else {
1342 		goto drop_and_release;
1343 	}
1344 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1345 
1346 	if (want_cookie && !tmp_opt.saw_tstamp)
1347 		tcp_clear_options(&tmp_opt);
1348 
1349 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1350 	tcp_openreq_init(req, &tmp_opt, skb);
1351 
1352 	ireq = inet_rsk(req);
1353 	ireq->loc_addr = daddr;
1354 	ireq->rmt_addr = saddr;
1355 	ireq->no_srccheck = inet_sk(sk)->transparent;
1356 	ireq->opt = tcp_v4_save_options(sk, skb);
1357 
1358 	if (security_inet_conn_request(sk, skb, req))
1359 		goto drop_and_free;
1360 
1361 	if (!want_cookie || tmp_opt.tstamp_ok)
1362 		TCP_ECN_create_request(req, skb);
1363 
1364 	if (want_cookie) {
1365 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1366 		req->cookie_ts = tmp_opt.tstamp_ok;
1367 	} else if (!isn) {
1368 		struct flowi4 fl4;
1369 
1370 		/* VJ's idea. We save last timestamp seen
1371 		 * from the destination in peer table, when entering
1372 		 * state TIME-WAIT, and check against it before
1373 		 * accepting new connection request.
1374 		 *
1375 		 * If "isn" is not zero, this request hit alive
1376 		 * timewait bucket, so that all the necessary checks
1377 		 * are made in the function processing timewait state.
1378 		 */
1379 		if (tmp_opt.saw_tstamp &&
1380 		    tcp_death_row.sysctl_tw_recycle &&
1381 		    (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
1382 		    fl4.daddr == saddr) {
1383 			if (!tcp_peer_is_proven(req, dst, true)) {
1384 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1385 				goto drop_and_release;
1386 			}
1387 		}
1388 		/* Kill the following clause, if you dislike this way. */
1389 		else if (!sysctl_tcp_syncookies &&
1390 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1391 			  (sysctl_max_syn_backlog >> 2)) &&
1392 			 !tcp_peer_is_proven(req, dst, false)) {
1393 			/* Without syncookies last quarter of
1394 			 * backlog is filled with destinations,
1395 			 * proven to be alive.
1396 			 * It means that we continue to communicate
1397 			 * to destinations, already remembered
1398 			 * to the moment of synflood.
1399 			 */
1400 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1401 				       &saddr, ntohs(tcp_hdr(skb)->source));
1402 			goto drop_and_release;
1403 		}
1404 
1405 		isn = tcp_v4_init_sequence(skb);
1406 	}
1407 	tcp_rsk(req)->snt_isn = isn;
1408 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
1409 
1410 	if (tcp_v4_send_synack(sk, dst, req,
1411 			       (struct request_values *)&tmp_ext,
1412 			       skb_get_queue_mapping(skb),
1413 			       want_cookie) ||
1414 	    want_cookie)
1415 		goto drop_and_free;
1416 
1417 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1418 	return 0;
1419 
1420 drop_and_release:
1421 	dst_release(dst);
1422 drop_and_free:
1423 	reqsk_free(req);
1424 drop:
1425 	return 0;
1426 }
1427 EXPORT_SYMBOL(tcp_v4_conn_request);
1428 
1429 
1430 /*
1431  * The three way handshake has completed - we got a valid synack -
1432  * now create the new socket.
1433  */
1434 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1435 				  struct request_sock *req,
1436 				  struct dst_entry *dst)
1437 {
1438 	struct inet_request_sock *ireq;
1439 	struct inet_sock *newinet;
1440 	struct tcp_sock *newtp;
1441 	struct sock *newsk;
1442 #ifdef CONFIG_TCP_MD5SIG
1443 	struct tcp_md5sig_key *key;
1444 #endif
1445 	struct ip_options_rcu *inet_opt;
1446 
1447 	if (sk_acceptq_is_full(sk))
1448 		goto exit_overflow;
1449 
1450 	newsk = tcp_create_openreq_child(sk, req, skb);
1451 	if (!newsk)
1452 		goto exit_nonewsk;
1453 
1454 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1455 
1456 	newtp		      = tcp_sk(newsk);
1457 	newinet		      = inet_sk(newsk);
1458 	ireq		      = inet_rsk(req);
1459 	newinet->inet_daddr   = ireq->rmt_addr;
1460 	newinet->inet_rcv_saddr = ireq->loc_addr;
1461 	newinet->inet_saddr	      = ireq->loc_addr;
1462 	inet_opt	      = ireq->opt;
1463 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1464 	ireq->opt	      = NULL;
1465 	newinet->mc_index     = inet_iif(skb);
1466 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1467 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1468 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1469 	if (inet_opt)
1470 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1471 	newinet->inet_id = newtp->write_seq ^ jiffies;
1472 
1473 	if (!dst) {
1474 		dst = inet_csk_route_child_sock(sk, newsk, req);
1475 		if (!dst)
1476 			goto put_and_exit;
1477 	} else {
1478 		/* syncookie case : see end of cookie_v4_check() */
1479 	}
1480 	sk_setup_caps(newsk, dst);
1481 
1482 	tcp_mtup_init(newsk);
1483 	tcp_sync_mss(newsk, dst_mtu(dst));
1484 	newtp->advmss = dst_metric_advmss(dst);
1485 	if (tcp_sk(sk)->rx_opt.user_mss &&
1486 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1487 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1488 
1489 	tcp_initialize_rcv_mss(newsk);
1490 	if (tcp_rsk(req)->snt_synack)
1491 		tcp_valid_rtt_meas(newsk,
1492 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1493 	newtp->total_retrans = req->retrans;
1494 
1495 #ifdef CONFIG_TCP_MD5SIG
1496 	/* Copy over the MD5 key from the original socket */
1497 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1498 				AF_INET);
1499 	if (key != NULL) {
1500 		/*
1501 		 * We're using one, so create a matching key
1502 		 * on the newsk structure. If we fail to get
1503 		 * memory, then we end up not copying the key
1504 		 * across. Shucks.
1505 		 */
1506 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1507 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1508 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1509 	}
1510 #endif
1511 
1512 	if (__inet_inherit_port(sk, newsk) < 0)
1513 		goto put_and_exit;
1514 	__inet_hash_nolisten(newsk, NULL);
1515 
1516 	return newsk;
1517 
1518 exit_overflow:
1519 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1520 exit_nonewsk:
1521 	dst_release(dst);
1522 exit:
1523 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1524 	return NULL;
1525 put_and_exit:
1526 	tcp_clear_xmit_timers(newsk);
1527 	tcp_cleanup_congestion_control(newsk);
1528 	bh_unlock_sock(newsk);
1529 	sock_put(newsk);
1530 	goto exit;
1531 }
1532 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1533 
1534 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1535 {
1536 	struct tcphdr *th = tcp_hdr(skb);
1537 	const struct iphdr *iph = ip_hdr(skb);
1538 	struct sock *nsk;
1539 	struct request_sock **prev;
1540 	/* Find possible connection requests. */
1541 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1542 						       iph->saddr, iph->daddr);
1543 	if (req)
1544 		return tcp_check_req(sk, skb, req, prev);
1545 
1546 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1547 			th->source, iph->daddr, th->dest, inet_iif(skb));
1548 
1549 	if (nsk) {
1550 		if (nsk->sk_state != TCP_TIME_WAIT) {
1551 			bh_lock_sock(nsk);
1552 			return nsk;
1553 		}
1554 		inet_twsk_put(inet_twsk(nsk));
1555 		return NULL;
1556 	}
1557 
1558 #ifdef CONFIG_SYN_COOKIES
1559 	if (!th->syn)
1560 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1561 #endif
1562 	return sk;
1563 }
1564 
1565 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1566 {
1567 	const struct iphdr *iph = ip_hdr(skb);
1568 
1569 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1570 		if (!tcp_v4_check(skb->len, iph->saddr,
1571 				  iph->daddr, skb->csum)) {
1572 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1573 			return 0;
1574 		}
1575 	}
1576 
1577 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1578 				       skb->len, IPPROTO_TCP, 0);
1579 
1580 	if (skb->len <= 76) {
1581 		return __skb_checksum_complete(skb);
1582 	}
1583 	return 0;
1584 }
1585 
1586 
1587 /* The socket must have it's spinlock held when we get
1588  * here.
1589  *
1590  * We have a potential double-lock case here, so even when
1591  * doing backlog processing we use the BH locking scheme.
1592  * This is because we cannot sleep with the original spinlock
1593  * held.
1594  */
1595 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1596 {
1597 	struct sock *rsk;
1598 #ifdef CONFIG_TCP_MD5SIG
1599 	/*
1600 	 * We really want to reject the packet as early as possible
1601 	 * if:
1602 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1603 	 *  o There is an MD5 option and we're not expecting one
1604 	 */
1605 	if (tcp_v4_inbound_md5_hash(sk, skb))
1606 		goto discard;
1607 #endif
1608 
1609 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1610 		sock_rps_save_rxhash(sk, skb);
1611 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1612 			rsk = sk;
1613 			goto reset;
1614 		}
1615 		return 0;
1616 	}
1617 
1618 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1619 		goto csum_err;
1620 
1621 	if (sk->sk_state == TCP_LISTEN) {
1622 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1623 		if (!nsk)
1624 			goto discard;
1625 
1626 		if (nsk != sk) {
1627 			sock_rps_save_rxhash(nsk, skb);
1628 			if (tcp_child_process(sk, nsk, skb)) {
1629 				rsk = nsk;
1630 				goto reset;
1631 			}
1632 			return 0;
1633 		}
1634 	} else
1635 		sock_rps_save_rxhash(sk, skb);
1636 
1637 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1638 		rsk = sk;
1639 		goto reset;
1640 	}
1641 	return 0;
1642 
1643 reset:
1644 	tcp_v4_send_reset(rsk, skb);
1645 discard:
1646 	kfree_skb(skb);
1647 	/* Be careful here. If this function gets more complicated and
1648 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1649 	 * might be destroyed here. This current version compiles correctly,
1650 	 * but you have been warned.
1651 	 */
1652 	return 0;
1653 
1654 csum_err:
1655 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1656 	goto discard;
1657 }
1658 EXPORT_SYMBOL(tcp_v4_do_rcv);
1659 
1660 void tcp_v4_early_demux(struct sk_buff *skb)
1661 {
1662 	struct net *net = dev_net(skb->dev);
1663 	const struct iphdr *iph;
1664 	const struct tcphdr *th;
1665 	struct net_device *dev;
1666 	struct sock *sk;
1667 
1668 	if (skb->pkt_type != PACKET_HOST)
1669 		return;
1670 
1671 	if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1672 		return;
1673 
1674 	iph = ip_hdr(skb);
1675 	th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1676 
1677 	if (th->doff < sizeof(struct tcphdr) / 4)
1678 		return;
1679 
1680 	if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4))
1681 		return;
1682 
1683 	dev = skb->dev;
1684 	sk = __inet_lookup_established(net, &tcp_hashinfo,
1685 				       iph->saddr, th->source,
1686 				       iph->daddr, ntohs(th->dest),
1687 				       dev->ifindex);
1688 	if (sk) {
1689 		skb->sk = sk;
1690 		skb->destructor = sock_edemux;
1691 		if (sk->sk_state != TCP_TIME_WAIT) {
1692 			struct dst_entry *dst = sk->sk_rx_dst;
1693 			if (dst)
1694 				dst = dst_check(dst, 0);
1695 			if (dst) {
1696 				struct rtable *rt = (struct rtable *) dst;
1697 
1698 				if (rt->rt_iif == dev->ifindex)
1699 					skb_dst_set_noref(skb, dst);
1700 			}
1701 		}
1702 	}
1703 }
1704 
1705 /*
1706  *	From tcp_input.c
1707  */
1708 
1709 int tcp_v4_rcv(struct sk_buff *skb)
1710 {
1711 	const struct iphdr *iph;
1712 	const struct tcphdr *th;
1713 	struct sock *sk;
1714 	int ret;
1715 	struct net *net = dev_net(skb->dev);
1716 
1717 	if (skb->pkt_type != PACKET_HOST)
1718 		goto discard_it;
1719 
1720 	/* Count it even if it's bad */
1721 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1722 
1723 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1724 		goto discard_it;
1725 
1726 	th = tcp_hdr(skb);
1727 
1728 	if (th->doff < sizeof(struct tcphdr) / 4)
1729 		goto bad_packet;
1730 	if (!pskb_may_pull(skb, th->doff * 4))
1731 		goto discard_it;
1732 
1733 	/* An explanation is required here, I think.
1734 	 * Packet length and doff are validated by header prediction,
1735 	 * provided case of th->doff==0 is eliminated.
1736 	 * So, we defer the checks. */
1737 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1738 		goto bad_packet;
1739 
1740 	th = tcp_hdr(skb);
1741 	iph = ip_hdr(skb);
1742 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1743 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1744 				    skb->len - th->doff * 4);
1745 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1746 	TCP_SKB_CB(skb)->when	 = 0;
1747 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1748 	TCP_SKB_CB(skb)->sacked	 = 0;
1749 
1750 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1751 	if (!sk)
1752 		goto no_tcp_socket;
1753 
1754 process:
1755 	if (sk->sk_state == TCP_TIME_WAIT)
1756 		goto do_time_wait;
1757 
1758 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1759 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1760 		goto discard_and_relse;
1761 	}
1762 
1763 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1764 		goto discard_and_relse;
1765 	nf_reset(skb);
1766 
1767 	if (sk_filter(sk, skb))
1768 		goto discard_and_relse;
1769 
1770 	skb->dev = NULL;
1771 
1772 	bh_lock_sock_nested(sk);
1773 	ret = 0;
1774 	if (!sock_owned_by_user(sk)) {
1775 #ifdef CONFIG_NET_DMA
1776 		struct tcp_sock *tp = tcp_sk(sk);
1777 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1778 			tp->ucopy.dma_chan = net_dma_find_channel();
1779 		if (tp->ucopy.dma_chan)
1780 			ret = tcp_v4_do_rcv(sk, skb);
1781 		else
1782 #endif
1783 		{
1784 			if (!tcp_prequeue(sk, skb))
1785 				ret = tcp_v4_do_rcv(sk, skb);
1786 		}
1787 	} else if (unlikely(sk_add_backlog(sk, skb,
1788 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
1789 		bh_unlock_sock(sk);
1790 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1791 		goto discard_and_relse;
1792 	}
1793 	bh_unlock_sock(sk);
1794 
1795 	sock_put(sk);
1796 
1797 	return ret;
1798 
1799 no_tcp_socket:
1800 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1801 		goto discard_it;
1802 
1803 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1804 bad_packet:
1805 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1806 	} else {
1807 		tcp_v4_send_reset(NULL, skb);
1808 	}
1809 
1810 discard_it:
1811 	/* Discard frame. */
1812 	kfree_skb(skb);
1813 	return 0;
1814 
1815 discard_and_relse:
1816 	sock_put(sk);
1817 	goto discard_it;
1818 
1819 do_time_wait:
1820 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1821 		inet_twsk_put(inet_twsk(sk));
1822 		goto discard_it;
1823 	}
1824 
1825 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1826 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1827 		inet_twsk_put(inet_twsk(sk));
1828 		goto discard_it;
1829 	}
1830 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1831 	case TCP_TW_SYN: {
1832 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1833 							&tcp_hashinfo,
1834 							iph->daddr, th->dest,
1835 							inet_iif(skb));
1836 		if (sk2) {
1837 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1838 			inet_twsk_put(inet_twsk(sk));
1839 			sk = sk2;
1840 			goto process;
1841 		}
1842 		/* Fall through to ACK */
1843 	}
1844 	case TCP_TW_ACK:
1845 		tcp_v4_timewait_ack(sk, skb);
1846 		break;
1847 	case TCP_TW_RST:
1848 		goto no_tcp_socket;
1849 	case TCP_TW_SUCCESS:;
1850 	}
1851 	goto discard_it;
1852 }
1853 
1854 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1855 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1856 	.twsk_unique	= tcp_twsk_unique,
1857 	.twsk_destructor= tcp_twsk_destructor,
1858 };
1859 
1860 const struct inet_connection_sock_af_ops ipv4_specific = {
1861 	.queue_xmit	   = ip_queue_xmit,
1862 	.send_check	   = tcp_v4_send_check,
1863 	.rebuild_header	   = inet_sk_rebuild_header,
1864 	.conn_request	   = tcp_v4_conn_request,
1865 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1866 	.net_header_len	   = sizeof(struct iphdr),
1867 	.setsockopt	   = ip_setsockopt,
1868 	.getsockopt	   = ip_getsockopt,
1869 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1870 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1871 	.bind_conflict	   = inet_csk_bind_conflict,
1872 #ifdef CONFIG_COMPAT
1873 	.compat_setsockopt = compat_ip_setsockopt,
1874 	.compat_getsockopt = compat_ip_getsockopt,
1875 #endif
1876 };
1877 EXPORT_SYMBOL(ipv4_specific);
1878 
1879 #ifdef CONFIG_TCP_MD5SIG
1880 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1881 	.md5_lookup		= tcp_v4_md5_lookup,
1882 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1883 	.md5_parse		= tcp_v4_parse_md5_keys,
1884 };
1885 #endif
1886 
1887 /* NOTE: A lot of things set to zero explicitly by call to
1888  *       sk_alloc() so need not be done here.
1889  */
1890 static int tcp_v4_init_sock(struct sock *sk)
1891 {
1892 	struct inet_connection_sock *icsk = inet_csk(sk);
1893 
1894 	tcp_init_sock(sk);
1895 
1896 	icsk->icsk_af_ops = &ipv4_specific;
1897 
1898 #ifdef CONFIG_TCP_MD5SIG
1899 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1900 #endif
1901 
1902 	return 0;
1903 }
1904 
1905 void tcp_v4_destroy_sock(struct sock *sk)
1906 {
1907 	struct tcp_sock *tp = tcp_sk(sk);
1908 
1909 	tcp_clear_xmit_timers(sk);
1910 
1911 	tcp_cleanup_congestion_control(sk);
1912 
1913 	/* Cleanup up the write buffer. */
1914 	tcp_write_queue_purge(sk);
1915 
1916 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1917 	__skb_queue_purge(&tp->out_of_order_queue);
1918 
1919 #ifdef CONFIG_TCP_MD5SIG
1920 	/* Clean up the MD5 key list, if any */
1921 	if (tp->md5sig_info) {
1922 		tcp_clear_md5_list(sk);
1923 		kfree_rcu(tp->md5sig_info, rcu);
1924 		tp->md5sig_info = NULL;
1925 	}
1926 #endif
1927 
1928 #ifdef CONFIG_NET_DMA
1929 	/* Cleans up our sk_async_wait_queue */
1930 	__skb_queue_purge(&sk->sk_async_wait_queue);
1931 #endif
1932 
1933 	/* Clean prequeue, it must be empty really */
1934 	__skb_queue_purge(&tp->ucopy.prequeue);
1935 
1936 	/* Clean up a referenced TCP bind bucket. */
1937 	if (inet_csk(sk)->icsk_bind_hash)
1938 		inet_put_port(sk);
1939 
1940 	/*
1941 	 * If sendmsg cached page exists, toss it.
1942 	 */
1943 	if (sk->sk_sndmsg_page) {
1944 		__free_page(sk->sk_sndmsg_page);
1945 		sk->sk_sndmsg_page = NULL;
1946 	}
1947 
1948 	/* TCP Cookie Transactions */
1949 	if (tp->cookie_values != NULL) {
1950 		kref_put(&tp->cookie_values->kref,
1951 			 tcp_cookie_values_release);
1952 		tp->cookie_values = NULL;
1953 	}
1954 
1955 	sk_sockets_allocated_dec(sk);
1956 	sock_release_memcg(sk);
1957 }
1958 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1959 
1960 #ifdef CONFIG_PROC_FS
1961 /* Proc filesystem TCP sock list dumping. */
1962 
1963 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1964 {
1965 	return hlist_nulls_empty(head) ? NULL :
1966 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1967 }
1968 
1969 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1970 {
1971 	return !is_a_nulls(tw->tw_node.next) ?
1972 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1973 }
1974 
1975 /*
1976  * Get next listener socket follow cur.  If cur is NULL, get first socket
1977  * starting from bucket given in st->bucket; when st->bucket is zero the
1978  * very first socket in the hash table is returned.
1979  */
1980 static void *listening_get_next(struct seq_file *seq, void *cur)
1981 {
1982 	struct inet_connection_sock *icsk;
1983 	struct hlist_nulls_node *node;
1984 	struct sock *sk = cur;
1985 	struct inet_listen_hashbucket *ilb;
1986 	struct tcp_iter_state *st = seq->private;
1987 	struct net *net = seq_file_net(seq);
1988 
1989 	if (!sk) {
1990 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1991 		spin_lock_bh(&ilb->lock);
1992 		sk = sk_nulls_head(&ilb->head);
1993 		st->offset = 0;
1994 		goto get_sk;
1995 	}
1996 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1997 	++st->num;
1998 	++st->offset;
1999 
2000 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2001 		struct request_sock *req = cur;
2002 
2003 		icsk = inet_csk(st->syn_wait_sk);
2004 		req = req->dl_next;
2005 		while (1) {
2006 			while (req) {
2007 				if (req->rsk_ops->family == st->family) {
2008 					cur = req;
2009 					goto out;
2010 				}
2011 				req = req->dl_next;
2012 			}
2013 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2014 				break;
2015 get_req:
2016 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2017 		}
2018 		sk	  = sk_nulls_next(st->syn_wait_sk);
2019 		st->state = TCP_SEQ_STATE_LISTENING;
2020 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021 	} else {
2022 		icsk = inet_csk(sk);
2023 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2024 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2025 			goto start_req;
2026 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2027 		sk = sk_nulls_next(sk);
2028 	}
2029 get_sk:
2030 	sk_nulls_for_each_from(sk, node) {
2031 		if (!net_eq(sock_net(sk), net))
2032 			continue;
2033 		if (sk->sk_family == st->family) {
2034 			cur = sk;
2035 			goto out;
2036 		}
2037 		icsk = inet_csk(sk);
2038 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2039 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2040 start_req:
2041 			st->uid		= sock_i_uid(sk);
2042 			st->syn_wait_sk = sk;
2043 			st->state	= TCP_SEQ_STATE_OPENREQ;
2044 			st->sbucket	= 0;
2045 			goto get_req;
2046 		}
2047 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2048 	}
2049 	spin_unlock_bh(&ilb->lock);
2050 	st->offset = 0;
2051 	if (++st->bucket < INET_LHTABLE_SIZE) {
2052 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2053 		spin_lock_bh(&ilb->lock);
2054 		sk = sk_nulls_head(&ilb->head);
2055 		goto get_sk;
2056 	}
2057 	cur = NULL;
2058 out:
2059 	return cur;
2060 }
2061 
2062 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2063 {
2064 	struct tcp_iter_state *st = seq->private;
2065 	void *rc;
2066 
2067 	st->bucket = 0;
2068 	st->offset = 0;
2069 	rc = listening_get_next(seq, NULL);
2070 
2071 	while (rc && *pos) {
2072 		rc = listening_get_next(seq, rc);
2073 		--*pos;
2074 	}
2075 	return rc;
2076 }
2077 
2078 static inline bool empty_bucket(struct tcp_iter_state *st)
2079 {
2080 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2081 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2082 }
2083 
2084 /*
2085  * Get first established socket starting from bucket given in st->bucket.
2086  * If st->bucket is zero, the very first socket in the hash is returned.
2087  */
2088 static void *established_get_first(struct seq_file *seq)
2089 {
2090 	struct tcp_iter_state *st = seq->private;
2091 	struct net *net = seq_file_net(seq);
2092 	void *rc = NULL;
2093 
2094 	st->offset = 0;
2095 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2096 		struct sock *sk;
2097 		struct hlist_nulls_node *node;
2098 		struct inet_timewait_sock *tw;
2099 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2100 
2101 		/* Lockless fast path for the common case of empty buckets */
2102 		if (empty_bucket(st))
2103 			continue;
2104 
2105 		spin_lock_bh(lock);
2106 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2107 			if (sk->sk_family != st->family ||
2108 			    !net_eq(sock_net(sk), net)) {
2109 				continue;
2110 			}
2111 			rc = sk;
2112 			goto out;
2113 		}
2114 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2115 		inet_twsk_for_each(tw, node,
2116 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2117 			if (tw->tw_family != st->family ||
2118 			    !net_eq(twsk_net(tw), net)) {
2119 				continue;
2120 			}
2121 			rc = tw;
2122 			goto out;
2123 		}
2124 		spin_unlock_bh(lock);
2125 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2126 	}
2127 out:
2128 	return rc;
2129 }
2130 
2131 static void *established_get_next(struct seq_file *seq, void *cur)
2132 {
2133 	struct sock *sk = cur;
2134 	struct inet_timewait_sock *tw;
2135 	struct hlist_nulls_node *node;
2136 	struct tcp_iter_state *st = seq->private;
2137 	struct net *net = seq_file_net(seq);
2138 
2139 	++st->num;
2140 	++st->offset;
2141 
2142 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2143 		tw = cur;
2144 		tw = tw_next(tw);
2145 get_tw:
2146 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2147 			tw = tw_next(tw);
2148 		}
2149 		if (tw) {
2150 			cur = tw;
2151 			goto out;
2152 		}
2153 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2154 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2155 
2156 		/* Look for next non empty bucket */
2157 		st->offset = 0;
2158 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2159 				empty_bucket(st))
2160 			;
2161 		if (st->bucket > tcp_hashinfo.ehash_mask)
2162 			return NULL;
2163 
2164 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2165 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2166 	} else
2167 		sk = sk_nulls_next(sk);
2168 
2169 	sk_nulls_for_each_from(sk, node) {
2170 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2171 			goto found;
2172 	}
2173 
2174 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2175 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2176 	goto get_tw;
2177 found:
2178 	cur = sk;
2179 out:
2180 	return cur;
2181 }
2182 
2183 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2184 {
2185 	struct tcp_iter_state *st = seq->private;
2186 	void *rc;
2187 
2188 	st->bucket = 0;
2189 	rc = established_get_first(seq);
2190 
2191 	while (rc && pos) {
2192 		rc = established_get_next(seq, rc);
2193 		--pos;
2194 	}
2195 	return rc;
2196 }
2197 
2198 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2199 {
2200 	void *rc;
2201 	struct tcp_iter_state *st = seq->private;
2202 
2203 	st->state = TCP_SEQ_STATE_LISTENING;
2204 	rc	  = listening_get_idx(seq, &pos);
2205 
2206 	if (!rc) {
2207 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2208 		rc	  = established_get_idx(seq, pos);
2209 	}
2210 
2211 	return rc;
2212 }
2213 
2214 static void *tcp_seek_last_pos(struct seq_file *seq)
2215 {
2216 	struct tcp_iter_state *st = seq->private;
2217 	int offset = st->offset;
2218 	int orig_num = st->num;
2219 	void *rc = NULL;
2220 
2221 	switch (st->state) {
2222 	case TCP_SEQ_STATE_OPENREQ:
2223 	case TCP_SEQ_STATE_LISTENING:
2224 		if (st->bucket >= INET_LHTABLE_SIZE)
2225 			break;
2226 		st->state = TCP_SEQ_STATE_LISTENING;
2227 		rc = listening_get_next(seq, NULL);
2228 		while (offset-- && rc)
2229 			rc = listening_get_next(seq, rc);
2230 		if (rc)
2231 			break;
2232 		st->bucket = 0;
2233 		/* Fallthrough */
2234 	case TCP_SEQ_STATE_ESTABLISHED:
2235 	case TCP_SEQ_STATE_TIME_WAIT:
2236 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2237 		if (st->bucket > tcp_hashinfo.ehash_mask)
2238 			break;
2239 		rc = established_get_first(seq);
2240 		while (offset-- && rc)
2241 			rc = established_get_next(seq, rc);
2242 	}
2243 
2244 	st->num = orig_num;
2245 
2246 	return rc;
2247 }
2248 
2249 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2250 {
2251 	struct tcp_iter_state *st = seq->private;
2252 	void *rc;
2253 
2254 	if (*pos && *pos == st->last_pos) {
2255 		rc = tcp_seek_last_pos(seq);
2256 		if (rc)
2257 			goto out;
2258 	}
2259 
2260 	st->state = TCP_SEQ_STATE_LISTENING;
2261 	st->num = 0;
2262 	st->bucket = 0;
2263 	st->offset = 0;
2264 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2265 
2266 out:
2267 	st->last_pos = *pos;
2268 	return rc;
2269 }
2270 
2271 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2272 {
2273 	struct tcp_iter_state *st = seq->private;
2274 	void *rc = NULL;
2275 
2276 	if (v == SEQ_START_TOKEN) {
2277 		rc = tcp_get_idx(seq, 0);
2278 		goto out;
2279 	}
2280 
2281 	switch (st->state) {
2282 	case TCP_SEQ_STATE_OPENREQ:
2283 	case TCP_SEQ_STATE_LISTENING:
2284 		rc = listening_get_next(seq, v);
2285 		if (!rc) {
2286 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2287 			st->bucket = 0;
2288 			st->offset = 0;
2289 			rc	  = established_get_first(seq);
2290 		}
2291 		break;
2292 	case TCP_SEQ_STATE_ESTABLISHED:
2293 	case TCP_SEQ_STATE_TIME_WAIT:
2294 		rc = established_get_next(seq, v);
2295 		break;
2296 	}
2297 out:
2298 	++*pos;
2299 	st->last_pos = *pos;
2300 	return rc;
2301 }
2302 
2303 static void tcp_seq_stop(struct seq_file *seq, void *v)
2304 {
2305 	struct tcp_iter_state *st = seq->private;
2306 
2307 	switch (st->state) {
2308 	case TCP_SEQ_STATE_OPENREQ:
2309 		if (v) {
2310 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2311 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2312 		}
2313 	case TCP_SEQ_STATE_LISTENING:
2314 		if (v != SEQ_START_TOKEN)
2315 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2316 		break;
2317 	case TCP_SEQ_STATE_TIME_WAIT:
2318 	case TCP_SEQ_STATE_ESTABLISHED:
2319 		if (v)
2320 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2321 		break;
2322 	}
2323 }
2324 
2325 int tcp_seq_open(struct inode *inode, struct file *file)
2326 {
2327 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2328 	struct tcp_iter_state *s;
2329 	int err;
2330 
2331 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2332 			  sizeof(struct tcp_iter_state));
2333 	if (err < 0)
2334 		return err;
2335 
2336 	s = ((struct seq_file *)file->private_data)->private;
2337 	s->family		= afinfo->family;
2338 	s->last_pos 		= 0;
2339 	return 0;
2340 }
2341 EXPORT_SYMBOL(tcp_seq_open);
2342 
2343 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2344 {
2345 	int rc = 0;
2346 	struct proc_dir_entry *p;
2347 
2348 	afinfo->seq_ops.start		= tcp_seq_start;
2349 	afinfo->seq_ops.next		= tcp_seq_next;
2350 	afinfo->seq_ops.stop		= tcp_seq_stop;
2351 
2352 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2353 			     afinfo->seq_fops, afinfo);
2354 	if (!p)
2355 		rc = -ENOMEM;
2356 	return rc;
2357 }
2358 EXPORT_SYMBOL(tcp_proc_register);
2359 
2360 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2361 {
2362 	proc_net_remove(net, afinfo->name);
2363 }
2364 EXPORT_SYMBOL(tcp_proc_unregister);
2365 
2366 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2367 			 struct seq_file *f, int i, int uid, int *len)
2368 {
2369 	const struct inet_request_sock *ireq = inet_rsk(req);
2370 	int ttd = req->expires - jiffies;
2371 
2372 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2373 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2374 		i,
2375 		ireq->loc_addr,
2376 		ntohs(inet_sk(sk)->inet_sport),
2377 		ireq->rmt_addr,
2378 		ntohs(ireq->rmt_port),
2379 		TCP_SYN_RECV,
2380 		0, 0, /* could print option size, but that is af dependent. */
2381 		1,    /* timers active (only the expire timer) */
2382 		jiffies_to_clock_t(ttd),
2383 		req->retrans,
2384 		uid,
2385 		0,  /* non standard timer */
2386 		0, /* open_requests have no inode */
2387 		atomic_read(&sk->sk_refcnt),
2388 		req,
2389 		len);
2390 }
2391 
2392 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2393 {
2394 	int timer_active;
2395 	unsigned long timer_expires;
2396 	const struct tcp_sock *tp = tcp_sk(sk);
2397 	const struct inet_connection_sock *icsk = inet_csk(sk);
2398 	const struct inet_sock *inet = inet_sk(sk);
2399 	__be32 dest = inet->inet_daddr;
2400 	__be32 src = inet->inet_rcv_saddr;
2401 	__u16 destp = ntohs(inet->inet_dport);
2402 	__u16 srcp = ntohs(inet->inet_sport);
2403 	int rx_queue;
2404 
2405 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2406 		timer_active	= 1;
2407 		timer_expires	= icsk->icsk_timeout;
2408 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2409 		timer_active	= 4;
2410 		timer_expires	= icsk->icsk_timeout;
2411 	} else if (timer_pending(&sk->sk_timer)) {
2412 		timer_active	= 2;
2413 		timer_expires	= sk->sk_timer.expires;
2414 	} else {
2415 		timer_active	= 0;
2416 		timer_expires = jiffies;
2417 	}
2418 
2419 	if (sk->sk_state == TCP_LISTEN)
2420 		rx_queue = sk->sk_ack_backlog;
2421 	else
2422 		/*
2423 		 * because we dont lock socket, we might find a transient negative value
2424 		 */
2425 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2426 
2427 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2428 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2429 		i, src, srcp, dest, destp, sk->sk_state,
2430 		tp->write_seq - tp->snd_una,
2431 		rx_queue,
2432 		timer_active,
2433 		jiffies_to_clock_t(timer_expires - jiffies),
2434 		icsk->icsk_retransmits,
2435 		sock_i_uid(sk),
2436 		icsk->icsk_probes_out,
2437 		sock_i_ino(sk),
2438 		atomic_read(&sk->sk_refcnt), sk,
2439 		jiffies_to_clock_t(icsk->icsk_rto),
2440 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2441 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2442 		tp->snd_cwnd,
2443 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2444 		len);
2445 }
2446 
2447 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2448 			       struct seq_file *f, int i, int *len)
2449 {
2450 	__be32 dest, src;
2451 	__u16 destp, srcp;
2452 	int ttd = tw->tw_ttd - jiffies;
2453 
2454 	if (ttd < 0)
2455 		ttd = 0;
2456 
2457 	dest  = tw->tw_daddr;
2458 	src   = tw->tw_rcv_saddr;
2459 	destp = ntohs(tw->tw_dport);
2460 	srcp  = ntohs(tw->tw_sport);
2461 
2462 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2463 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2464 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2465 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2466 		atomic_read(&tw->tw_refcnt), tw, len);
2467 }
2468 
2469 #define TMPSZ 150
2470 
2471 static int tcp4_seq_show(struct seq_file *seq, void *v)
2472 {
2473 	struct tcp_iter_state *st;
2474 	int len;
2475 
2476 	if (v == SEQ_START_TOKEN) {
2477 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2478 			   "  sl  local_address rem_address   st tx_queue "
2479 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2480 			   "inode");
2481 		goto out;
2482 	}
2483 	st = seq->private;
2484 
2485 	switch (st->state) {
2486 	case TCP_SEQ_STATE_LISTENING:
2487 	case TCP_SEQ_STATE_ESTABLISHED:
2488 		get_tcp4_sock(v, seq, st->num, &len);
2489 		break;
2490 	case TCP_SEQ_STATE_OPENREQ:
2491 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2492 		break;
2493 	case TCP_SEQ_STATE_TIME_WAIT:
2494 		get_timewait4_sock(v, seq, st->num, &len);
2495 		break;
2496 	}
2497 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2498 out:
2499 	return 0;
2500 }
2501 
2502 static const struct file_operations tcp_afinfo_seq_fops = {
2503 	.owner   = THIS_MODULE,
2504 	.open    = tcp_seq_open,
2505 	.read    = seq_read,
2506 	.llseek  = seq_lseek,
2507 	.release = seq_release_net
2508 };
2509 
2510 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2511 	.name		= "tcp",
2512 	.family		= AF_INET,
2513 	.seq_fops	= &tcp_afinfo_seq_fops,
2514 	.seq_ops	= {
2515 		.show		= tcp4_seq_show,
2516 	},
2517 };
2518 
2519 static int __net_init tcp4_proc_init_net(struct net *net)
2520 {
2521 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2522 }
2523 
2524 static void __net_exit tcp4_proc_exit_net(struct net *net)
2525 {
2526 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2527 }
2528 
2529 static struct pernet_operations tcp4_net_ops = {
2530 	.init = tcp4_proc_init_net,
2531 	.exit = tcp4_proc_exit_net,
2532 };
2533 
2534 int __init tcp4_proc_init(void)
2535 {
2536 	return register_pernet_subsys(&tcp4_net_ops);
2537 }
2538 
2539 void tcp4_proc_exit(void)
2540 {
2541 	unregister_pernet_subsys(&tcp4_net_ops);
2542 }
2543 #endif /* CONFIG_PROC_FS */
2544 
2545 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2546 {
2547 	const struct iphdr *iph = skb_gro_network_header(skb);
2548 
2549 	switch (skb->ip_summed) {
2550 	case CHECKSUM_COMPLETE:
2551 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2552 				  skb->csum)) {
2553 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2554 			break;
2555 		}
2556 
2557 		/* fall through */
2558 	case CHECKSUM_NONE:
2559 		NAPI_GRO_CB(skb)->flush = 1;
2560 		return NULL;
2561 	}
2562 
2563 	return tcp_gro_receive(head, skb);
2564 }
2565 
2566 int tcp4_gro_complete(struct sk_buff *skb)
2567 {
2568 	const struct iphdr *iph = ip_hdr(skb);
2569 	struct tcphdr *th = tcp_hdr(skb);
2570 
2571 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2572 				  iph->saddr, iph->daddr, 0);
2573 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2574 
2575 	return tcp_gro_complete(skb);
2576 }
2577 
2578 struct proto tcp_prot = {
2579 	.name			= "TCP",
2580 	.owner			= THIS_MODULE,
2581 	.close			= tcp_close,
2582 	.connect		= tcp_v4_connect,
2583 	.disconnect		= tcp_disconnect,
2584 	.accept			= inet_csk_accept,
2585 	.ioctl			= tcp_ioctl,
2586 	.init			= tcp_v4_init_sock,
2587 	.destroy		= tcp_v4_destroy_sock,
2588 	.shutdown		= tcp_shutdown,
2589 	.setsockopt		= tcp_setsockopt,
2590 	.getsockopt		= tcp_getsockopt,
2591 	.recvmsg		= tcp_recvmsg,
2592 	.sendmsg		= tcp_sendmsg,
2593 	.sendpage		= tcp_sendpage,
2594 	.backlog_rcv		= tcp_v4_do_rcv,
2595 	.release_cb		= tcp_release_cb,
2596 	.hash			= inet_hash,
2597 	.unhash			= inet_unhash,
2598 	.get_port		= inet_csk_get_port,
2599 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2600 	.sockets_allocated	= &tcp_sockets_allocated,
2601 	.orphan_count		= &tcp_orphan_count,
2602 	.memory_allocated	= &tcp_memory_allocated,
2603 	.memory_pressure	= &tcp_memory_pressure,
2604 	.sysctl_wmem		= sysctl_tcp_wmem,
2605 	.sysctl_rmem		= sysctl_tcp_rmem,
2606 	.max_header		= MAX_TCP_HEADER,
2607 	.obj_size		= sizeof(struct tcp_sock),
2608 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2609 	.twsk_prot		= &tcp_timewait_sock_ops,
2610 	.rsk_prot		= &tcp_request_sock_ops,
2611 	.h.hashinfo		= &tcp_hashinfo,
2612 	.no_autobind		= true,
2613 #ifdef CONFIG_COMPAT
2614 	.compat_setsockopt	= compat_tcp_setsockopt,
2615 	.compat_getsockopt	= compat_tcp_getsockopt,
2616 #endif
2617 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2618 	.init_cgroup		= tcp_init_cgroup,
2619 	.destroy_cgroup		= tcp_destroy_cgroup,
2620 	.proto_cgroup		= tcp_proto_cgroup,
2621 #endif
2622 };
2623 EXPORT_SYMBOL(tcp_prot);
2624 
2625 static int __net_init tcp_sk_init(struct net *net)
2626 {
2627 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2628 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2629 }
2630 
2631 static void __net_exit tcp_sk_exit(struct net *net)
2632 {
2633 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2634 }
2635 
2636 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2637 {
2638 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2639 }
2640 
2641 static struct pernet_operations __net_initdata tcp_sk_ops = {
2642        .init	   = tcp_sk_init,
2643        .exit	   = tcp_sk_exit,
2644        .exit_batch = tcp_sk_exit_batch,
2645 };
2646 
2647 void __init tcp_v4_init(void)
2648 {
2649 	inet_hashinfo_init(&tcp_hashinfo);
2650 	if (register_pernet_subsys(&tcp_sk_ops))
2651 		panic("Failed to create the TCP control socket.\n");
2652 }
2653