xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision e1f7c9ee)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 #define pr_fmt(fmt) "TCP: " fmt
54 
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65 
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/secure_seq.h>
76 #include <net/tcp_memcontrol.h>
77 #include <net/busy_poll.h>
78 
79 #include <linux/inet.h>
80 #include <linux/ipv6.h>
81 #include <linux/stddef.h>
82 #include <linux/proc_fs.h>
83 #include <linux/seq_file.h>
84 
85 #include <linux/crypto.h>
86 #include <linux/scatterlist.h>
87 
88 int sysctl_tcp_tw_reuse __read_mostly;
89 int sysctl_tcp_low_latency __read_mostly;
90 EXPORT_SYMBOL(sysctl_tcp_low_latency);
91 
92 #ifdef CONFIG_TCP_MD5SIG
93 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
94 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
95 #endif
96 
97 struct inet_hashinfo tcp_hashinfo;
98 EXPORT_SYMBOL(tcp_hashinfo);
99 
100 static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
101 {
102 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
103 					  ip_hdr(skb)->saddr,
104 					  tcp_hdr(skb)->dest,
105 					  tcp_hdr(skb)->source);
106 }
107 
108 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
109 {
110 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
111 	struct tcp_sock *tp = tcp_sk(sk);
112 
113 	/* With PAWS, it is safe from the viewpoint
114 	   of data integrity. Even without PAWS it is safe provided sequence
115 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
116 
117 	   Actually, the idea is close to VJ's one, only timestamp cache is
118 	   held not per host, but per port pair and TW bucket is used as state
119 	   holder.
120 
121 	   If TW bucket has been already destroyed we fall back to VJ's scheme
122 	   and use initial timestamp retrieved from peer table.
123 	 */
124 	if (tcptw->tw_ts_recent_stamp &&
125 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
126 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
127 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
128 		if (tp->write_seq == 0)
129 			tp->write_seq = 1;
130 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
131 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
132 		sock_hold(sktw);
133 		return 1;
134 	}
135 
136 	return 0;
137 }
138 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
139 
140 /* This will initiate an outgoing connection. */
141 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142 {
143 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
144 	struct inet_sock *inet = inet_sk(sk);
145 	struct tcp_sock *tp = tcp_sk(sk);
146 	__be16 orig_sport, orig_dport;
147 	__be32 daddr, nexthop;
148 	struct flowi4 *fl4;
149 	struct rtable *rt;
150 	int err;
151 	struct ip_options_rcu *inet_opt;
152 
153 	if (addr_len < sizeof(struct sockaddr_in))
154 		return -EINVAL;
155 
156 	if (usin->sin_family != AF_INET)
157 		return -EAFNOSUPPORT;
158 
159 	nexthop = daddr = usin->sin_addr.s_addr;
160 	inet_opt = rcu_dereference_protected(inet->inet_opt,
161 					     sock_owned_by_user(sk));
162 	if (inet_opt && inet_opt->opt.srr) {
163 		if (!daddr)
164 			return -EINVAL;
165 		nexthop = inet_opt->opt.faddr;
166 	}
167 
168 	orig_sport = inet->inet_sport;
169 	orig_dport = usin->sin_port;
170 	fl4 = &inet->cork.fl.u.ip4;
171 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
172 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173 			      IPPROTO_TCP,
174 			      orig_sport, orig_dport, sk);
175 	if (IS_ERR(rt)) {
176 		err = PTR_ERR(rt);
177 		if (err == -ENETUNREACH)
178 			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
179 		return err;
180 	}
181 
182 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
183 		ip_rt_put(rt);
184 		return -ENETUNREACH;
185 	}
186 
187 	if (!inet_opt || !inet_opt->opt.srr)
188 		daddr = fl4->daddr;
189 
190 	if (!inet->inet_saddr)
191 		inet->inet_saddr = fl4->saddr;
192 	inet->inet_rcv_saddr = inet->inet_saddr;
193 
194 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
195 		/* Reset inherited state */
196 		tp->rx_opt.ts_recent	   = 0;
197 		tp->rx_opt.ts_recent_stamp = 0;
198 		if (likely(!tp->repair))
199 			tp->write_seq	   = 0;
200 	}
201 
202 	if (tcp_death_row.sysctl_tw_recycle &&
203 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
204 		tcp_fetch_timewait_stamp(sk, &rt->dst);
205 
206 	inet->inet_dport = usin->sin_port;
207 	inet->inet_daddr = daddr;
208 
209 	inet_csk(sk)->icsk_ext_hdr_len = 0;
210 	if (inet_opt)
211 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
212 
213 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
214 
215 	/* Socket identity is still unknown (sport may be zero).
216 	 * However we set state to SYN-SENT and not releasing socket
217 	 * lock select source port, enter ourselves into the hash tables and
218 	 * complete initialization after this.
219 	 */
220 	tcp_set_state(sk, TCP_SYN_SENT);
221 	err = inet_hash_connect(&tcp_death_row, sk);
222 	if (err)
223 		goto failure;
224 
225 	inet_set_txhash(sk);
226 
227 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228 			       inet->inet_sport, inet->inet_dport, sk);
229 	if (IS_ERR(rt)) {
230 		err = PTR_ERR(rt);
231 		rt = NULL;
232 		goto failure;
233 	}
234 	/* OK, now commit destination to socket.  */
235 	sk->sk_gso_type = SKB_GSO_TCPV4;
236 	sk_setup_caps(sk, &rt->dst);
237 
238 	if (!tp->write_seq && likely(!tp->repair))
239 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 							   inet->inet_daddr,
241 							   inet->inet_sport,
242 							   usin->sin_port);
243 
244 	inet->inet_id = tp->write_seq ^ jiffies;
245 
246 	err = tcp_connect(sk);
247 
248 	rt = NULL;
249 	if (err)
250 		goto failure;
251 
252 	return 0;
253 
254 failure:
255 	/*
256 	 * This unhashes the socket and releases the local port,
257 	 * if necessary.
258 	 */
259 	tcp_set_state(sk, TCP_CLOSE);
260 	ip_rt_put(rt);
261 	sk->sk_route_caps = 0;
262 	inet->inet_dport = 0;
263 	return err;
264 }
265 EXPORT_SYMBOL(tcp_v4_connect);
266 
267 /*
268  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269  * It can be called through tcp_release_cb() if socket was owned by user
270  * at the time tcp_v4_err() was called to handle ICMP message.
271  */
272 void tcp_v4_mtu_reduced(struct sock *sk)
273 {
274 	struct dst_entry *dst;
275 	struct inet_sock *inet = inet_sk(sk);
276 	u32 mtu = tcp_sk(sk)->mtu_info;
277 
278 	dst = inet_csk_update_pmtu(sk, mtu);
279 	if (!dst)
280 		return;
281 
282 	/* Something is about to be wrong... Remember soft error
283 	 * for the case, if this connection will not able to recover.
284 	 */
285 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 		sk->sk_err_soft = EMSGSIZE;
287 
288 	mtu = dst_mtu(dst);
289 
290 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
291 	    ip_sk_accept_pmtu(sk) &&
292 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
293 		tcp_sync_mss(sk, mtu);
294 
295 		/* Resend the TCP packet because it's
296 		 * clear that the old packet has been
297 		 * dropped. This is the new "fast" path mtu
298 		 * discovery.
299 		 */
300 		tcp_simple_retransmit(sk);
301 	} /* else let the usual retransmit timer handle it */
302 }
303 EXPORT_SYMBOL(tcp_v4_mtu_reduced);
304 
305 static void do_redirect(struct sk_buff *skb, struct sock *sk)
306 {
307 	struct dst_entry *dst = __sk_dst_check(sk, 0);
308 
309 	if (dst)
310 		dst->ops->redirect(dst, sk, skb);
311 }
312 
313 /*
314  * This routine is called by the ICMP module when it gets some
315  * sort of error condition.  If err < 0 then the socket should
316  * be closed and the error returned to the user.  If err > 0
317  * it's just the icmp type << 8 | icmp code.  After adjustment
318  * header points to the first 8 bytes of the tcp header.  We need
319  * to find the appropriate port.
320  *
321  * The locking strategy used here is very "optimistic". When
322  * someone else accesses the socket the ICMP is just dropped
323  * and for some paths there is no check at all.
324  * A more general error queue to queue errors for later handling
325  * is probably better.
326  *
327  */
328 
329 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
330 {
331 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
332 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
333 	struct inet_connection_sock *icsk;
334 	struct tcp_sock *tp;
335 	struct inet_sock *inet;
336 	const int type = icmp_hdr(icmp_skb)->type;
337 	const int code = icmp_hdr(icmp_skb)->code;
338 	struct sock *sk;
339 	struct sk_buff *skb;
340 	struct request_sock *fastopen;
341 	__u32 seq, snd_una;
342 	__u32 remaining;
343 	int err;
344 	struct net *net = dev_net(icmp_skb->dev);
345 
346 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
347 			iph->saddr, th->source, inet_iif(icmp_skb));
348 	if (!sk) {
349 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
350 		return;
351 	}
352 	if (sk->sk_state == TCP_TIME_WAIT) {
353 		inet_twsk_put(inet_twsk(sk));
354 		return;
355 	}
356 
357 	bh_lock_sock(sk);
358 	/* If too many ICMPs get dropped on busy
359 	 * servers this needs to be solved differently.
360 	 * We do take care of PMTU discovery (RFC1191) special case :
361 	 * we can receive locally generated ICMP messages while socket is held.
362 	 */
363 	if (sock_owned_by_user(sk)) {
364 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
365 			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
366 	}
367 	if (sk->sk_state == TCP_CLOSE)
368 		goto out;
369 
370 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
371 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
372 		goto out;
373 	}
374 
375 	icsk = inet_csk(sk);
376 	tp = tcp_sk(sk);
377 	seq = ntohl(th->seq);
378 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
379 	fastopen = tp->fastopen_rsk;
380 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
381 	if (sk->sk_state != TCP_LISTEN &&
382 	    !between(seq, snd_una, tp->snd_nxt)) {
383 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
384 		goto out;
385 	}
386 
387 	switch (type) {
388 	case ICMP_REDIRECT:
389 		do_redirect(icmp_skb, sk);
390 		goto out;
391 	case ICMP_SOURCE_QUENCH:
392 		/* Just silently ignore these. */
393 		goto out;
394 	case ICMP_PARAMETERPROB:
395 		err = EPROTO;
396 		break;
397 	case ICMP_DEST_UNREACH:
398 		if (code > NR_ICMP_UNREACH)
399 			goto out;
400 
401 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
402 			/* We are not interested in TCP_LISTEN and open_requests
403 			 * (SYN-ACKs send out by Linux are always <576bytes so
404 			 * they should go through unfragmented).
405 			 */
406 			if (sk->sk_state == TCP_LISTEN)
407 				goto out;
408 
409 			tp->mtu_info = info;
410 			if (!sock_owned_by_user(sk)) {
411 				tcp_v4_mtu_reduced(sk);
412 			} else {
413 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
414 					sock_hold(sk);
415 			}
416 			goto out;
417 		}
418 
419 		err = icmp_err_convert[code].errno;
420 		/* check if icmp_skb allows revert of backoff
421 		 * (see draft-zimmermann-tcp-lcd) */
422 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
423 			break;
424 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
425 		    !icsk->icsk_backoff || fastopen)
426 			break;
427 
428 		if (sock_owned_by_user(sk))
429 			break;
430 
431 		icsk->icsk_backoff--;
432 		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
433 					       TCP_TIMEOUT_INIT;
434 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
435 
436 		skb = tcp_write_queue_head(sk);
437 		BUG_ON(!skb);
438 
439 		remaining = icsk->icsk_rto -
440 			    min(icsk->icsk_rto,
441 				tcp_time_stamp - tcp_skb_timestamp(skb));
442 
443 		if (remaining) {
444 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
445 						  remaining, TCP_RTO_MAX);
446 		} else {
447 			/* RTO revert clocked out retransmission.
448 			 * Will retransmit now */
449 			tcp_retransmit_timer(sk);
450 		}
451 
452 		break;
453 	case ICMP_TIME_EXCEEDED:
454 		err = EHOSTUNREACH;
455 		break;
456 	default:
457 		goto out;
458 	}
459 
460 	switch (sk->sk_state) {
461 		struct request_sock *req, **prev;
462 	case TCP_LISTEN:
463 		if (sock_owned_by_user(sk))
464 			goto out;
465 
466 		req = inet_csk_search_req(sk, &prev, th->dest,
467 					  iph->daddr, iph->saddr);
468 		if (!req)
469 			goto out;
470 
471 		/* ICMPs are not backlogged, hence we cannot get
472 		   an established socket here.
473 		 */
474 		WARN_ON(req->sk);
475 
476 		if (seq != tcp_rsk(req)->snt_isn) {
477 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
478 			goto out;
479 		}
480 
481 		/*
482 		 * Still in SYN_RECV, just remove it silently.
483 		 * There is no good way to pass the error to the newly
484 		 * created socket, and POSIX does not want network
485 		 * errors returned from accept().
486 		 */
487 		inet_csk_reqsk_queue_drop(sk, req, prev);
488 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
489 		goto out;
490 
491 	case TCP_SYN_SENT:
492 	case TCP_SYN_RECV:
493 		/* Only in fast or simultaneous open. If a fast open socket is
494 		 * is already accepted it is treated as a connected one below.
495 		 */
496 		if (fastopen && fastopen->sk == NULL)
497 			break;
498 
499 		if (!sock_owned_by_user(sk)) {
500 			sk->sk_err = err;
501 
502 			sk->sk_error_report(sk);
503 
504 			tcp_done(sk);
505 		} else {
506 			sk->sk_err_soft = err;
507 		}
508 		goto out;
509 	}
510 
511 	/* If we've already connected we will keep trying
512 	 * until we time out, or the user gives up.
513 	 *
514 	 * rfc1122 4.2.3.9 allows to consider as hard errors
515 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
516 	 * but it is obsoleted by pmtu discovery).
517 	 *
518 	 * Note, that in modern internet, where routing is unreliable
519 	 * and in each dark corner broken firewalls sit, sending random
520 	 * errors ordered by their masters even this two messages finally lose
521 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
522 	 *
523 	 * Now we are in compliance with RFCs.
524 	 *							--ANK (980905)
525 	 */
526 
527 	inet = inet_sk(sk);
528 	if (!sock_owned_by_user(sk) && inet->recverr) {
529 		sk->sk_err = err;
530 		sk->sk_error_report(sk);
531 	} else	{ /* Only an error on timeout */
532 		sk->sk_err_soft = err;
533 	}
534 
535 out:
536 	bh_unlock_sock(sk);
537 	sock_put(sk);
538 }
539 
540 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
541 {
542 	struct tcphdr *th = tcp_hdr(skb);
543 
544 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
545 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
546 		skb->csum_start = skb_transport_header(skb) - skb->head;
547 		skb->csum_offset = offsetof(struct tcphdr, check);
548 	} else {
549 		th->check = tcp_v4_check(skb->len, saddr, daddr,
550 					 csum_partial(th,
551 						      th->doff << 2,
552 						      skb->csum));
553 	}
554 }
555 
556 /* This routine computes an IPv4 TCP checksum. */
557 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
558 {
559 	const struct inet_sock *inet = inet_sk(sk);
560 
561 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
562 }
563 EXPORT_SYMBOL(tcp_v4_send_check);
564 
565 /*
566  *	This routine will send an RST to the other tcp.
567  *
568  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
569  *		      for reset.
570  *	Answer: if a packet caused RST, it is not for a socket
571  *		existing in our system, if it is matched to a socket,
572  *		it is just duplicate segment or bug in other side's TCP.
573  *		So that we build reply only basing on parameters
574  *		arrived with segment.
575  *	Exception: precedence violation. We do not implement it in any case.
576  */
577 
578 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
579 {
580 	const struct tcphdr *th = tcp_hdr(skb);
581 	struct {
582 		struct tcphdr th;
583 #ifdef CONFIG_TCP_MD5SIG
584 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
585 #endif
586 	} rep;
587 	struct ip_reply_arg arg;
588 #ifdef CONFIG_TCP_MD5SIG
589 	struct tcp_md5sig_key *key;
590 	const __u8 *hash_location = NULL;
591 	unsigned char newhash[16];
592 	int genhash;
593 	struct sock *sk1 = NULL;
594 #endif
595 	struct net *net;
596 
597 	/* Never send a reset in response to a reset. */
598 	if (th->rst)
599 		return;
600 
601 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
602 		return;
603 
604 	/* Swap the send and the receive. */
605 	memset(&rep, 0, sizeof(rep));
606 	rep.th.dest   = th->source;
607 	rep.th.source = th->dest;
608 	rep.th.doff   = sizeof(struct tcphdr) / 4;
609 	rep.th.rst    = 1;
610 
611 	if (th->ack) {
612 		rep.th.seq = th->ack_seq;
613 	} else {
614 		rep.th.ack = 1;
615 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
616 				       skb->len - (th->doff << 2));
617 	}
618 
619 	memset(&arg, 0, sizeof(arg));
620 	arg.iov[0].iov_base = (unsigned char *)&rep;
621 	arg.iov[0].iov_len  = sizeof(rep.th);
622 
623 #ifdef CONFIG_TCP_MD5SIG
624 	hash_location = tcp_parse_md5sig_option(th);
625 	if (!sk && hash_location) {
626 		/*
627 		 * active side is lost. Try to find listening socket through
628 		 * source port, and then find md5 key through listening socket.
629 		 * we are not loose security here:
630 		 * Incoming packet is checked with md5 hash with finding key,
631 		 * no RST generated if md5 hash doesn't match.
632 		 */
633 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
634 					     &tcp_hashinfo, ip_hdr(skb)->saddr,
635 					     th->source, ip_hdr(skb)->daddr,
636 					     ntohs(th->source), inet_iif(skb));
637 		/* don't send rst if it can't find key */
638 		if (!sk1)
639 			return;
640 		rcu_read_lock();
641 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
642 					&ip_hdr(skb)->saddr, AF_INET);
643 		if (!key)
644 			goto release_sk1;
645 
646 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
647 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
648 			goto release_sk1;
649 	} else {
650 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
651 					     &ip_hdr(skb)->saddr,
652 					     AF_INET) : NULL;
653 	}
654 
655 	if (key) {
656 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
657 				   (TCPOPT_NOP << 16) |
658 				   (TCPOPT_MD5SIG << 8) |
659 				   TCPOLEN_MD5SIG);
660 		/* Update length and the length the header thinks exists */
661 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
662 		rep.th.doff = arg.iov[0].iov_len / 4;
663 
664 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
665 				     key, ip_hdr(skb)->saddr,
666 				     ip_hdr(skb)->daddr, &rep.th);
667 	}
668 #endif
669 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
670 				      ip_hdr(skb)->saddr, /* XXX */
671 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
672 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
673 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
674 	/* When socket is gone, all binding information is lost.
675 	 * routing might fail in this case. No choice here, if we choose to force
676 	 * input interface, we will misroute in case of asymmetric route.
677 	 */
678 	if (sk)
679 		arg.bound_dev_if = sk->sk_bound_dev_if;
680 
681 	net = dev_net(skb_dst(skb)->dev);
682 	arg.tos = ip_hdr(skb)->tos;
683 	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
684 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
685 			      &arg, arg.iov[0].iov_len);
686 
687 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
688 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
689 
690 #ifdef CONFIG_TCP_MD5SIG
691 release_sk1:
692 	if (sk1) {
693 		rcu_read_unlock();
694 		sock_put(sk1);
695 	}
696 #endif
697 }
698 
699 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
700    outside socket context is ugly, certainly. What can I do?
701  */
702 
703 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
704 			    u32 win, u32 tsval, u32 tsecr, int oif,
705 			    struct tcp_md5sig_key *key,
706 			    int reply_flags, u8 tos)
707 {
708 	const struct tcphdr *th = tcp_hdr(skb);
709 	struct {
710 		struct tcphdr th;
711 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
712 #ifdef CONFIG_TCP_MD5SIG
713 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
714 #endif
715 			];
716 	} rep;
717 	struct ip_reply_arg arg;
718 	struct net *net = dev_net(skb_dst(skb)->dev);
719 
720 	memset(&rep.th, 0, sizeof(struct tcphdr));
721 	memset(&arg, 0, sizeof(arg));
722 
723 	arg.iov[0].iov_base = (unsigned char *)&rep;
724 	arg.iov[0].iov_len  = sizeof(rep.th);
725 	if (tsecr) {
726 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
727 				   (TCPOPT_TIMESTAMP << 8) |
728 				   TCPOLEN_TIMESTAMP);
729 		rep.opt[1] = htonl(tsval);
730 		rep.opt[2] = htonl(tsecr);
731 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
732 	}
733 
734 	/* Swap the send and the receive. */
735 	rep.th.dest    = th->source;
736 	rep.th.source  = th->dest;
737 	rep.th.doff    = arg.iov[0].iov_len / 4;
738 	rep.th.seq     = htonl(seq);
739 	rep.th.ack_seq = htonl(ack);
740 	rep.th.ack     = 1;
741 	rep.th.window  = htons(win);
742 
743 #ifdef CONFIG_TCP_MD5SIG
744 	if (key) {
745 		int offset = (tsecr) ? 3 : 0;
746 
747 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
748 					  (TCPOPT_NOP << 16) |
749 					  (TCPOPT_MD5SIG << 8) |
750 					  TCPOLEN_MD5SIG);
751 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
752 		rep.th.doff = arg.iov[0].iov_len/4;
753 
754 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
755 				    key, ip_hdr(skb)->saddr,
756 				    ip_hdr(skb)->daddr, &rep.th);
757 	}
758 #endif
759 	arg.flags = reply_flags;
760 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
761 				      ip_hdr(skb)->saddr, /* XXX */
762 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
763 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
764 	if (oif)
765 		arg.bound_dev_if = oif;
766 	arg.tos = tos;
767 	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
768 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
769 			      &arg, arg.iov[0].iov_len);
770 
771 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
772 }
773 
774 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
775 {
776 	struct inet_timewait_sock *tw = inet_twsk(sk);
777 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
778 
779 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
780 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
781 			tcp_time_stamp + tcptw->tw_ts_offset,
782 			tcptw->tw_ts_recent,
783 			tw->tw_bound_dev_if,
784 			tcp_twsk_md5_key(tcptw),
785 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
786 			tw->tw_tos
787 			);
788 
789 	inet_twsk_put(tw);
790 }
791 
792 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
793 				  struct request_sock *req)
794 {
795 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
796 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
797 	 */
798 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
799 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
800 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
801 			tcp_time_stamp,
802 			req->ts_recent,
803 			0,
804 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
805 					  AF_INET),
806 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
807 			ip_hdr(skb)->tos);
808 }
809 
810 /*
811  *	Send a SYN-ACK after having received a SYN.
812  *	This still operates on a request_sock only, not on a big
813  *	socket.
814  */
815 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
816 			      struct flowi *fl,
817 			      struct request_sock *req,
818 			      u16 queue_mapping,
819 			      struct tcp_fastopen_cookie *foc)
820 {
821 	const struct inet_request_sock *ireq = inet_rsk(req);
822 	struct flowi4 fl4;
823 	int err = -1;
824 	struct sk_buff *skb;
825 
826 	/* First, grab a route. */
827 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
828 		return -1;
829 
830 	skb = tcp_make_synack(sk, dst, req, foc);
831 
832 	if (skb) {
833 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
834 
835 		skb_set_queue_mapping(skb, queue_mapping);
836 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
837 					    ireq->ir_rmt_addr,
838 					    ireq->opt);
839 		err = net_xmit_eval(err);
840 	}
841 
842 	return err;
843 }
844 
845 /*
846  *	IPv4 request_sock destructor.
847  */
848 static void tcp_v4_reqsk_destructor(struct request_sock *req)
849 {
850 	kfree(inet_rsk(req)->opt);
851 }
852 
853 /*
854  * Return true if a syncookie should be sent
855  */
856 bool tcp_syn_flood_action(struct sock *sk,
857 			 const struct sk_buff *skb,
858 			 const char *proto)
859 {
860 	const char *msg = "Dropping request";
861 	bool want_cookie = false;
862 	struct listen_sock *lopt;
863 
864 #ifdef CONFIG_SYN_COOKIES
865 	if (sysctl_tcp_syncookies) {
866 		msg = "Sending cookies";
867 		want_cookie = true;
868 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
869 	} else
870 #endif
871 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
872 
873 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
874 	if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
875 		lopt->synflood_warned = 1;
876 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
877 			proto, ntohs(tcp_hdr(skb)->dest), msg);
878 	}
879 	return want_cookie;
880 }
881 EXPORT_SYMBOL(tcp_syn_flood_action);
882 
883 #ifdef CONFIG_TCP_MD5SIG
884 /*
885  * RFC2385 MD5 checksumming requires a mapping of
886  * IP address->MD5 Key.
887  * We need to maintain these in the sk structure.
888  */
889 
890 /* Find the Key structure for an address.  */
891 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
892 					 const union tcp_md5_addr *addr,
893 					 int family)
894 {
895 	struct tcp_sock *tp = tcp_sk(sk);
896 	struct tcp_md5sig_key *key;
897 	unsigned int size = sizeof(struct in_addr);
898 	struct tcp_md5sig_info *md5sig;
899 
900 	/* caller either holds rcu_read_lock() or socket lock */
901 	md5sig = rcu_dereference_check(tp->md5sig_info,
902 				       sock_owned_by_user(sk) ||
903 				       lockdep_is_held(&sk->sk_lock.slock));
904 	if (!md5sig)
905 		return NULL;
906 #if IS_ENABLED(CONFIG_IPV6)
907 	if (family == AF_INET6)
908 		size = sizeof(struct in6_addr);
909 #endif
910 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
911 		if (key->family != family)
912 			continue;
913 		if (!memcmp(&key->addr, addr, size))
914 			return key;
915 	}
916 	return NULL;
917 }
918 EXPORT_SYMBOL(tcp_md5_do_lookup);
919 
920 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
921 					 struct sock *addr_sk)
922 {
923 	union tcp_md5_addr *addr;
924 
925 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
926 	return tcp_md5_do_lookup(sk, addr, AF_INET);
927 }
928 EXPORT_SYMBOL(tcp_v4_md5_lookup);
929 
930 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
931 						      struct request_sock *req)
932 {
933 	union tcp_md5_addr *addr;
934 
935 	addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
936 	return tcp_md5_do_lookup(sk, addr, AF_INET);
937 }
938 
939 /* This can be called on a newly created socket, from other files */
940 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
941 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
942 {
943 	/* Add Key to the list */
944 	struct tcp_md5sig_key *key;
945 	struct tcp_sock *tp = tcp_sk(sk);
946 	struct tcp_md5sig_info *md5sig;
947 
948 	key = tcp_md5_do_lookup(sk, addr, family);
949 	if (key) {
950 		/* Pre-existing entry - just update that one. */
951 		memcpy(key->key, newkey, newkeylen);
952 		key->keylen = newkeylen;
953 		return 0;
954 	}
955 
956 	md5sig = rcu_dereference_protected(tp->md5sig_info,
957 					   sock_owned_by_user(sk));
958 	if (!md5sig) {
959 		md5sig = kmalloc(sizeof(*md5sig), gfp);
960 		if (!md5sig)
961 			return -ENOMEM;
962 
963 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
964 		INIT_HLIST_HEAD(&md5sig->head);
965 		rcu_assign_pointer(tp->md5sig_info, md5sig);
966 	}
967 
968 	key = sock_kmalloc(sk, sizeof(*key), gfp);
969 	if (!key)
970 		return -ENOMEM;
971 	if (!tcp_alloc_md5sig_pool()) {
972 		sock_kfree_s(sk, key, sizeof(*key));
973 		return -ENOMEM;
974 	}
975 
976 	memcpy(key->key, newkey, newkeylen);
977 	key->keylen = newkeylen;
978 	key->family = family;
979 	memcpy(&key->addr, addr,
980 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
981 				      sizeof(struct in_addr));
982 	hlist_add_head_rcu(&key->node, &md5sig->head);
983 	return 0;
984 }
985 EXPORT_SYMBOL(tcp_md5_do_add);
986 
987 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
988 {
989 	struct tcp_md5sig_key *key;
990 
991 	key = tcp_md5_do_lookup(sk, addr, family);
992 	if (!key)
993 		return -ENOENT;
994 	hlist_del_rcu(&key->node);
995 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
996 	kfree_rcu(key, rcu);
997 	return 0;
998 }
999 EXPORT_SYMBOL(tcp_md5_do_del);
1000 
1001 static void tcp_clear_md5_list(struct sock *sk)
1002 {
1003 	struct tcp_sock *tp = tcp_sk(sk);
1004 	struct tcp_md5sig_key *key;
1005 	struct hlist_node *n;
1006 	struct tcp_md5sig_info *md5sig;
1007 
1008 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1009 
1010 	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1011 		hlist_del_rcu(&key->node);
1012 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1013 		kfree_rcu(key, rcu);
1014 	}
1015 }
1016 
1017 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1018 				 int optlen)
1019 {
1020 	struct tcp_md5sig cmd;
1021 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1022 
1023 	if (optlen < sizeof(cmd))
1024 		return -EINVAL;
1025 
1026 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1027 		return -EFAULT;
1028 
1029 	if (sin->sin_family != AF_INET)
1030 		return -EINVAL;
1031 
1032 	if (!cmd.tcpm_keylen)
1033 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1034 				      AF_INET);
1035 
1036 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1037 		return -EINVAL;
1038 
1039 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1040 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1041 			      GFP_KERNEL);
1042 }
1043 
1044 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1045 					__be32 daddr, __be32 saddr, int nbytes)
1046 {
1047 	struct tcp4_pseudohdr *bp;
1048 	struct scatterlist sg;
1049 
1050 	bp = &hp->md5_blk.ip4;
1051 
1052 	/*
1053 	 * 1. the TCP pseudo-header (in the order: source IP address,
1054 	 * destination IP address, zero-padded protocol number, and
1055 	 * segment length)
1056 	 */
1057 	bp->saddr = saddr;
1058 	bp->daddr = daddr;
1059 	bp->pad = 0;
1060 	bp->protocol = IPPROTO_TCP;
1061 	bp->len = cpu_to_be16(nbytes);
1062 
1063 	sg_init_one(&sg, bp, sizeof(*bp));
1064 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1065 }
1066 
1067 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1068 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1069 {
1070 	struct tcp_md5sig_pool *hp;
1071 	struct hash_desc *desc;
1072 
1073 	hp = tcp_get_md5sig_pool();
1074 	if (!hp)
1075 		goto clear_hash_noput;
1076 	desc = &hp->md5_desc;
1077 
1078 	if (crypto_hash_init(desc))
1079 		goto clear_hash;
1080 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1081 		goto clear_hash;
1082 	if (tcp_md5_hash_header(hp, th))
1083 		goto clear_hash;
1084 	if (tcp_md5_hash_key(hp, key))
1085 		goto clear_hash;
1086 	if (crypto_hash_final(desc, md5_hash))
1087 		goto clear_hash;
1088 
1089 	tcp_put_md5sig_pool();
1090 	return 0;
1091 
1092 clear_hash:
1093 	tcp_put_md5sig_pool();
1094 clear_hash_noput:
1095 	memset(md5_hash, 0, 16);
1096 	return 1;
1097 }
1098 
1099 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1100 			const struct sock *sk, const struct request_sock *req,
1101 			const struct sk_buff *skb)
1102 {
1103 	struct tcp_md5sig_pool *hp;
1104 	struct hash_desc *desc;
1105 	const struct tcphdr *th = tcp_hdr(skb);
1106 	__be32 saddr, daddr;
1107 
1108 	if (sk) {
1109 		saddr = inet_sk(sk)->inet_saddr;
1110 		daddr = inet_sk(sk)->inet_daddr;
1111 	} else if (req) {
1112 		saddr = inet_rsk(req)->ir_loc_addr;
1113 		daddr = inet_rsk(req)->ir_rmt_addr;
1114 	} else {
1115 		const struct iphdr *iph = ip_hdr(skb);
1116 		saddr = iph->saddr;
1117 		daddr = iph->daddr;
1118 	}
1119 
1120 	hp = tcp_get_md5sig_pool();
1121 	if (!hp)
1122 		goto clear_hash_noput;
1123 	desc = &hp->md5_desc;
1124 
1125 	if (crypto_hash_init(desc))
1126 		goto clear_hash;
1127 
1128 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1129 		goto clear_hash;
1130 	if (tcp_md5_hash_header(hp, th))
1131 		goto clear_hash;
1132 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1133 		goto clear_hash;
1134 	if (tcp_md5_hash_key(hp, key))
1135 		goto clear_hash;
1136 	if (crypto_hash_final(desc, md5_hash))
1137 		goto clear_hash;
1138 
1139 	tcp_put_md5sig_pool();
1140 	return 0;
1141 
1142 clear_hash:
1143 	tcp_put_md5sig_pool();
1144 clear_hash_noput:
1145 	memset(md5_hash, 0, 16);
1146 	return 1;
1147 }
1148 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1149 
1150 static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
1151 				      const struct sk_buff *skb)
1152 {
1153 	/*
1154 	 * This gets called for each TCP segment that arrives
1155 	 * so we want to be efficient.
1156 	 * We have 3 drop cases:
1157 	 * o No MD5 hash and one expected.
1158 	 * o MD5 hash and we're not expecting one.
1159 	 * o MD5 hash and its wrong.
1160 	 */
1161 	const __u8 *hash_location = NULL;
1162 	struct tcp_md5sig_key *hash_expected;
1163 	const struct iphdr *iph = ip_hdr(skb);
1164 	const struct tcphdr *th = tcp_hdr(skb);
1165 	int genhash;
1166 	unsigned char newhash[16];
1167 
1168 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1169 					  AF_INET);
1170 	hash_location = tcp_parse_md5sig_option(th);
1171 
1172 	/* We've parsed the options - do we have a hash? */
1173 	if (!hash_expected && !hash_location)
1174 		return false;
1175 
1176 	if (hash_expected && !hash_location) {
1177 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1178 		return true;
1179 	}
1180 
1181 	if (!hash_expected && hash_location) {
1182 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1183 		return true;
1184 	}
1185 
1186 	/* Okay, so this is hash_expected and hash_location -
1187 	 * so we need to calculate the checksum.
1188 	 */
1189 	genhash = tcp_v4_md5_hash_skb(newhash,
1190 				      hash_expected,
1191 				      NULL, NULL, skb);
1192 
1193 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1194 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1195 				     &iph->saddr, ntohs(th->source),
1196 				     &iph->daddr, ntohs(th->dest),
1197 				     genhash ? " tcp_v4_calc_md5_hash failed"
1198 				     : "");
1199 		return true;
1200 	}
1201 	return false;
1202 }
1203 
1204 static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1205 {
1206 	bool ret;
1207 
1208 	rcu_read_lock();
1209 	ret = __tcp_v4_inbound_md5_hash(sk, skb);
1210 	rcu_read_unlock();
1211 
1212 	return ret;
1213 }
1214 
1215 #endif
1216 
1217 static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1218 			    struct sk_buff *skb)
1219 {
1220 	struct inet_request_sock *ireq = inet_rsk(req);
1221 
1222 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1223 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1224 	ireq->no_srccheck = inet_sk(sk)->transparent;
1225 	ireq->opt = tcp_v4_save_options(skb);
1226 }
1227 
1228 static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1229 					  const struct request_sock *req,
1230 					  bool *strict)
1231 {
1232 	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1233 
1234 	if (strict) {
1235 		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1236 			*strict = true;
1237 		else
1238 			*strict = false;
1239 	}
1240 
1241 	return dst;
1242 }
1243 
1244 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1245 	.family		=	PF_INET,
1246 	.obj_size	=	sizeof(struct tcp_request_sock),
1247 	.rtx_syn_ack	=	tcp_rtx_synack,
1248 	.send_ack	=	tcp_v4_reqsk_send_ack,
1249 	.destructor	=	tcp_v4_reqsk_destructor,
1250 	.send_reset	=	tcp_v4_send_reset,
1251 	.syn_ack_timeout =	tcp_syn_ack_timeout,
1252 };
1253 
1254 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1255 	.mss_clamp	=	TCP_MSS_DEFAULT,
1256 #ifdef CONFIG_TCP_MD5SIG
1257 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1258 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1259 #endif
1260 	.init_req	=	tcp_v4_init_req,
1261 #ifdef CONFIG_SYN_COOKIES
1262 	.cookie_init_seq =	cookie_v4_init_sequence,
1263 #endif
1264 	.route_req	=	tcp_v4_route_req,
1265 	.init_seq	=	tcp_v4_init_sequence,
1266 	.send_synack	=	tcp_v4_send_synack,
1267 	.queue_hash_add =	inet_csk_reqsk_queue_hash_add,
1268 };
1269 
1270 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1271 {
1272 	/* Never answer to SYNs send to broadcast or multicast */
1273 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1274 		goto drop;
1275 
1276 	return tcp_conn_request(&tcp_request_sock_ops,
1277 				&tcp_request_sock_ipv4_ops, sk, skb);
1278 
1279 drop:
1280 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1281 	return 0;
1282 }
1283 EXPORT_SYMBOL(tcp_v4_conn_request);
1284 
1285 
1286 /*
1287  * The three way handshake has completed - we got a valid synack -
1288  * now create the new socket.
1289  */
1290 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1291 				  struct request_sock *req,
1292 				  struct dst_entry *dst)
1293 {
1294 	struct inet_request_sock *ireq;
1295 	struct inet_sock *newinet;
1296 	struct tcp_sock *newtp;
1297 	struct sock *newsk;
1298 #ifdef CONFIG_TCP_MD5SIG
1299 	struct tcp_md5sig_key *key;
1300 #endif
1301 	struct ip_options_rcu *inet_opt;
1302 
1303 	if (sk_acceptq_is_full(sk))
1304 		goto exit_overflow;
1305 
1306 	newsk = tcp_create_openreq_child(sk, req, skb);
1307 	if (!newsk)
1308 		goto exit_nonewsk;
1309 
1310 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1311 	inet_sk_rx_dst_set(newsk, skb);
1312 
1313 	newtp		      = tcp_sk(newsk);
1314 	newinet		      = inet_sk(newsk);
1315 	ireq		      = inet_rsk(req);
1316 	newinet->inet_daddr   = ireq->ir_rmt_addr;
1317 	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1318 	newinet->inet_saddr	      = ireq->ir_loc_addr;
1319 	inet_opt	      = ireq->opt;
1320 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1321 	ireq->opt	      = NULL;
1322 	newinet->mc_index     = inet_iif(skb);
1323 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1324 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1325 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1326 	inet_set_txhash(newsk);
1327 	if (inet_opt)
1328 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1329 	newinet->inet_id = newtp->write_seq ^ jiffies;
1330 
1331 	if (!dst) {
1332 		dst = inet_csk_route_child_sock(sk, newsk, req);
1333 		if (!dst)
1334 			goto put_and_exit;
1335 	} else {
1336 		/* syncookie case : see end of cookie_v4_check() */
1337 	}
1338 	sk_setup_caps(newsk, dst);
1339 
1340 	tcp_sync_mss(newsk, dst_mtu(dst));
1341 	newtp->advmss = dst_metric_advmss(dst);
1342 	if (tcp_sk(sk)->rx_opt.user_mss &&
1343 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1344 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1345 
1346 	tcp_initialize_rcv_mss(newsk);
1347 
1348 #ifdef CONFIG_TCP_MD5SIG
1349 	/* Copy over the MD5 key from the original socket */
1350 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1351 				AF_INET);
1352 	if (key != NULL) {
1353 		/*
1354 		 * We're using one, so create a matching key
1355 		 * on the newsk structure. If we fail to get
1356 		 * memory, then we end up not copying the key
1357 		 * across. Shucks.
1358 		 */
1359 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1360 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1361 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1362 	}
1363 #endif
1364 
1365 	if (__inet_inherit_port(sk, newsk) < 0)
1366 		goto put_and_exit;
1367 	__inet_hash_nolisten(newsk, NULL);
1368 
1369 	return newsk;
1370 
1371 exit_overflow:
1372 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1373 exit_nonewsk:
1374 	dst_release(dst);
1375 exit:
1376 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1377 	return NULL;
1378 put_and_exit:
1379 	inet_csk_prepare_forced_close(newsk);
1380 	tcp_done(newsk);
1381 	goto exit;
1382 }
1383 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1384 
1385 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1386 {
1387 	struct tcphdr *th = tcp_hdr(skb);
1388 	const struct iphdr *iph = ip_hdr(skb);
1389 	struct sock *nsk;
1390 	struct request_sock **prev;
1391 	/* Find possible connection requests. */
1392 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1393 						       iph->saddr, iph->daddr);
1394 	if (req)
1395 		return tcp_check_req(sk, skb, req, prev, false);
1396 
1397 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1398 			th->source, iph->daddr, th->dest, inet_iif(skb));
1399 
1400 	if (nsk) {
1401 		if (nsk->sk_state != TCP_TIME_WAIT) {
1402 			bh_lock_sock(nsk);
1403 			return nsk;
1404 		}
1405 		inet_twsk_put(inet_twsk(nsk));
1406 		return NULL;
1407 	}
1408 
1409 #ifdef CONFIG_SYN_COOKIES
1410 	if (!th->syn)
1411 		sk = cookie_v4_check(sk, skb);
1412 #endif
1413 	return sk;
1414 }
1415 
1416 /* The socket must have it's spinlock held when we get
1417  * here.
1418  *
1419  * We have a potential double-lock case here, so even when
1420  * doing backlog processing we use the BH locking scheme.
1421  * This is because we cannot sleep with the original spinlock
1422  * held.
1423  */
1424 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1425 {
1426 	struct sock *rsk;
1427 
1428 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1429 		struct dst_entry *dst = sk->sk_rx_dst;
1430 
1431 		sock_rps_save_rxhash(sk, skb);
1432 		if (dst) {
1433 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1434 			    dst->ops->check(dst, 0) == NULL) {
1435 				dst_release(dst);
1436 				sk->sk_rx_dst = NULL;
1437 			}
1438 		}
1439 		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1440 		return 0;
1441 	}
1442 
1443 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1444 		goto csum_err;
1445 
1446 	if (sk->sk_state == TCP_LISTEN) {
1447 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1448 		if (!nsk)
1449 			goto discard;
1450 
1451 		if (nsk != sk) {
1452 			sock_rps_save_rxhash(nsk, skb);
1453 			if (tcp_child_process(sk, nsk, skb)) {
1454 				rsk = nsk;
1455 				goto reset;
1456 			}
1457 			return 0;
1458 		}
1459 	} else
1460 		sock_rps_save_rxhash(sk, skb);
1461 
1462 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1463 		rsk = sk;
1464 		goto reset;
1465 	}
1466 	return 0;
1467 
1468 reset:
1469 	tcp_v4_send_reset(rsk, skb);
1470 discard:
1471 	kfree_skb(skb);
1472 	/* Be careful here. If this function gets more complicated and
1473 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1474 	 * might be destroyed here. This current version compiles correctly,
1475 	 * but you have been warned.
1476 	 */
1477 	return 0;
1478 
1479 csum_err:
1480 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1481 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1482 	goto discard;
1483 }
1484 EXPORT_SYMBOL(tcp_v4_do_rcv);
1485 
1486 void tcp_v4_early_demux(struct sk_buff *skb)
1487 {
1488 	const struct iphdr *iph;
1489 	const struct tcphdr *th;
1490 	struct sock *sk;
1491 
1492 	if (skb->pkt_type != PACKET_HOST)
1493 		return;
1494 
1495 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1496 		return;
1497 
1498 	iph = ip_hdr(skb);
1499 	th = tcp_hdr(skb);
1500 
1501 	if (th->doff < sizeof(struct tcphdr) / 4)
1502 		return;
1503 
1504 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1505 				       iph->saddr, th->source,
1506 				       iph->daddr, ntohs(th->dest),
1507 				       skb->skb_iif);
1508 	if (sk) {
1509 		skb->sk = sk;
1510 		skb->destructor = sock_edemux;
1511 		if (sk->sk_state != TCP_TIME_WAIT) {
1512 			struct dst_entry *dst = sk->sk_rx_dst;
1513 
1514 			if (dst)
1515 				dst = dst_check(dst, 0);
1516 			if (dst &&
1517 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1518 				skb_dst_set_noref(skb, dst);
1519 		}
1520 	}
1521 }
1522 
1523 /* Packet is added to VJ-style prequeue for processing in process
1524  * context, if a reader task is waiting. Apparently, this exciting
1525  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1526  * failed somewhere. Latency? Burstiness? Well, at least now we will
1527  * see, why it failed. 8)8)				  --ANK
1528  *
1529  */
1530 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1531 {
1532 	struct tcp_sock *tp = tcp_sk(sk);
1533 
1534 	if (sysctl_tcp_low_latency || !tp->ucopy.task)
1535 		return false;
1536 
1537 	if (skb->len <= tcp_hdrlen(skb) &&
1538 	    skb_queue_len(&tp->ucopy.prequeue) == 0)
1539 		return false;
1540 
1541 	/* Before escaping RCU protected region, we need to take care of skb
1542 	 * dst. Prequeue is only enabled for established sockets.
1543 	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1544 	 * Instead of doing full sk_rx_dst validity here, let's perform
1545 	 * an optimistic check.
1546 	 */
1547 	if (likely(sk->sk_rx_dst))
1548 		skb_dst_drop(skb);
1549 	else
1550 		skb_dst_force(skb);
1551 
1552 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
1553 	tp->ucopy.memory += skb->truesize;
1554 	if (tp->ucopy.memory > sk->sk_rcvbuf) {
1555 		struct sk_buff *skb1;
1556 
1557 		BUG_ON(sock_owned_by_user(sk));
1558 
1559 		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1560 			sk_backlog_rcv(sk, skb1);
1561 			NET_INC_STATS_BH(sock_net(sk),
1562 					 LINUX_MIB_TCPPREQUEUEDROPPED);
1563 		}
1564 
1565 		tp->ucopy.memory = 0;
1566 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1567 		wake_up_interruptible_sync_poll(sk_sleep(sk),
1568 					   POLLIN | POLLRDNORM | POLLRDBAND);
1569 		if (!inet_csk_ack_scheduled(sk))
1570 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1571 						  (3 * tcp_rto_min(sk)) / 4,
1572 						  TCP_RTO_MAX);
1573 	}
1574 	return true;
1575 }
1576 EXPORT_SYMBOL(tcp_prequeue);
1577 
1578 /*
1579  *	From tcp_input.c
1580  */
1581 
1582 int tcp_v4_rcv(struct sk_buff *skb)
1583 {
1584 	const struct iphdr *iph;
1585 	const struct tcphdr *th;
1586 	struct sock *sk;
1587 	int ret;
1588 	struct net *net = dev_net(skb->dev);
1589 
1590 	if (skb->pkt_type != PACKET_HOST)
1591 		goto discard_it;
1592 
1593 	/* Count it even if it's bad */
1594 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1595 
1596 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1597 		goto discard_it;
1598 
1599 	th = tcp_hdr(skb);
1600 
1601 	if (th->doff < sizeof(struct tcphdr) / 4)
1602 		goto bad_packet;
1603 	if (!pskb_may_pull(skb, th->doff * 4))
1604 		goto discard_it;
1605 
1606 	/* An explanation is required here, I think.
1607 	 * Packet length and doff are validated by header prediction,
1608 	 * provided case of th->doff==0 is eliminated.
1609 	 * So, we defer the checks. */
1610 
1611 	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1612 		goto csum_error;
1613 
1614 	th = tcp_hdr(skb);
1615 	iph = ip_hdr(skb);
1616 	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1617 	 * barrier() makes sure compiler wont play fool^Waliasing games.
1618 	 */
1619 	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1620 		sizeof(struct inet_skb_parm));
1621 	barrier();
1622 
1623 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1624 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1625 				    skb->len - th->doff * 4);
1626 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1627 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1628 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1629 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1630 	TCP_SKB_CB(skb)->sacked	 = 0;
1631 
1632 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1633 	if (!sk)
1634 		goto no_tcp_socket;
1635 
1636 process:
1637 	if (sk->sk_state == TCP_TIME_WAIT)
1638 		goto do_time_wait;
1639 
1640 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1641 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1642 		goto discard_and_relse;
1643 	}
1644 
1645 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1646 		goto discard_and_relse;
1647 
1648 #ifdef CONFIG_TCP_MD5SIG
1649 	/*
1650 	 * We really want to reject the packet as early as possible
1651 	 * if:
1652 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1653 	 *  o There is an MD5 option and we're not expecting one
1654 	 */
1655 	if (tcp_v4_inbound_md5_hash(sk, skb))
1656 		goto discard_and_relse;
1657 #endif
1658 
1659 	nf_reset(skb);
1660 
1661 	if (sk_filter(sk, skb))
1662 		goto discard_and_relse;
1663 
1664 	sk_mark_napi_id(sk, skb);
1665 	skb->dev = NULL;
1666 
1667 	bh_lock_sock_nested(sk);
1668 	ret = 0;
1669 	if (!sock_owned_by_user(sk)) {
1670 		if (!tcp_prequeue(sk, skb))
1671 			ret = tcp_v4_do_rcv(sk, skb);
1672 	} else if (unlikely(sk_add_backlog(sk, skb,
1673 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
1674 		bh_unlock_sock(sk);
1675 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1676 		goto discard_and_relse;
1677 	}
1678 	bh_unlock_sock(sk);
1679 
1680 	sock_put(sk);
1681 
1682 	return ret;
1683 
1684 no_tcp_socket:
1685 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1686 		goto discard_it;
1687 
1688 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1689 csum_error:
1690 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1691 bad_packet:
1692 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1693 	} else {
1694 		tcp_v4_send_reset(NULL, skb);
1695 	}
1696 
1697 discard_it:
1698 	/* Discard frame. */
1699 	kfree_skb(skb);
1700 	return 0;
1701 
1702 discard_and_relse:
1703 	sock_put(sk);
1704 	goto discard_it;
1705 
1706 do_time_wait:
1707 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1708 		inet_twsk_put(inet_twsk(sk));
1709 		goto discard_it;
1710 	}
1711 
1712 	if (skb->len < (th->doff << 2)) {
1713 		inet_twsk_put(inet_twsk(sk));
1714 		goto bad_packet;
1715 	}
1716 	if (tcp_checksum_complete(skb)) {
1717 		inet_twsk_put(inet_twsk(sk));
1718 		goto csum_error;
1719 	}
1720 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1721 	case TCP_TW_SYN: {
1722 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1723 							&tcp_hashinfo,
1724 							iph->saddr, th->source,
1725 							iph->daddr, th->dest,
1726 							inet_iif(skb));
1727 		if (sk2) {
1728 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1729 			inet_twsk_put(inet_twsk(sk));
1730 			sk = sk2;
1731 			goto process;
1732 		}
1733 		/* Fall through to ACK */
1734 	}
1735 	case TCP_TW_ACK:
1736 		tcp_v4_timewait_ack(sk, skb);
1737 		break;
1738 	case TCP_TW_RST:
1739 		goto no_tcp_socket;
1740 	case TCP_TW_SUCCESS:;
1741 	}
1742 	goto discard_it;
1743 }
1744 
1745 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1746 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1747 	.twsk_unique	= tcp_twsk_unique,
1748 	.twsk_destructor= tcp_twsk_destructor,
1749 };
1750 
1751 void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1752 {
1753 	struct dst_entry *dst = skb_dst(skb);
1754 
1755 	if (dst) {
1756 		dst_hold(dst);
1757 		sk->sk_rx_dst = dst;
1758 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1759 	}
1760 }
1761 EXPORT_SYMBOL(inet_sk_rx_dst_set);
1762 
1763 const struct inet_connection_sock_af_ops ipv4_specific = {
1764 	.queue_xmit	   = ip_queue_xmit,
1765 	.send_check	   = tcp_v4_send_check,
1766 	.rebuild_header	   = inet_sk_rebuild_header,
1767 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1768 	.conn_request	   = tcp_v4_conn_request,
1769 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1770 	.net_header_len	   = sizeof(struct iphdr),
1771 	.setsockopt	   = ip_setsockopt,
1772 	.getsockopt	   = ip_getsockopt,
1773 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1774 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1775 	.bind_conflict	   = inet_csk_bind_conflict,
1776 #ifdef CONFIG_COMPAT
1777 	.compat_setsockopt = compat_ip_setsockopt,
1778 	.compat_getsockopt = compat_ip_getsockopt,
1779 #endif
1780 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1781 };
1782 EXPORT_SYMBOL(ipv4_specific);
1783 
1784 #ifdef CONFIG_TCP_MD5SIG
1785 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1786 	.md5_lookup		= tcp_v4_md5_lookup,
1787 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1788 	.md5_parse		= tcp_v4_parse_md5_keys,
1789 };
1790 #endif
1791 
1792 /* NOTE: A lot of things set to zero explicitly by call to
1793  *       sk_alloc() so need not be done here.
1794  */
1795 static int tcp_v4_init_sock(struct sock *sk)
1796 {
1797 	struct inet_connection_sock *icsk = inet_csk(sk);
1798 
1799 	tcp_init_sock(sk);
1800 
1801 	icsk->icsk_af_ops = &ipv4_specific;
1802 
1803 #ifdef CONFIG_TCP_MD5SIG
1804 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1805 #endif
1806 
1807 	return 0;
1808 }
1809 
1810 void tcp_v4_destroy_sock(struct sock *sk)
1811 {
1812 	struct tcp_sock *tp = tcp_sk(sk);
1813 
1814 	tcp_clear_xmit_timers(sk);
1815 
1816 	tcp_cleanup_congestion_control(sk);
1817 
1818 	/* Cleanup up the write buffer. */
1819 	tcp_write_queue_purge(sk);
1820 
1821 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1822 	__skb_queue_purge(&tp->out_of_order_queue);
1823 
1824 #ifdef CONFIG_TCP_MD5SIG
1825 	/* Clean up the MD5 key list, if any */
1826 	if (tp->md5sig_info) {
1827 		tcp_clear_md5_list(sk);
1828 		kfree_rcu(tp->md5sig_info, rcu);
1829 		tp->md5sig_info = NULL;
1830 	}
1831 #endif
1832 
1833 	/* Clean prequeue, it must be empty really */
1834 	__skb_queue_purge(&tp->ucopy.prequeue);
1835 
1836 	/* Clean up a referenced TCP bind bucket. */
1837 	if (inet_csk(sk)->icsk_bind_hash)
1838 		inet_put_port(sk);
1839 
1840 	BUG_ON(tp->fastopen_rsk != NULL);
1841 
1842 	/* If socket is aborted during connect operation */
1843 	tcp_free_fastopen_req(tp);
1844 
1845 	sk_sockets_allocated_dec(sk);
1846 	sock_release_memcg(sk);
1847 }
1848 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1849 
1850 #ifdef CONFIG_PROC_FS
1851 /* Proc filesystem TCP sock list dumping. */
1852 
1853 /*
1854  * Get next listener socket follow cur.  If cur is NULL, get first socket
1855  * starting from bucket given in st->bucket; when st->bucket is zero the
1856  * very first socket in the hash table is returned.
1857  */
1858 static void *listening_get_next(struct seq_file *seq, void *cur)
1859 {
1860 	struct inet_connection_sock *icsk;
1861 	struct hlist_nulls_node *node;
1862 	struct sock *sk = cur;
1863 	struct inet_listen_hashbucket *ilb;
1864 	struct tcp_iter_state *st = seq->private;
1865 	struct net *net = seq_file_net(seq);
1866 
1867 	if (!sk) {
1868 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1869 		spin_lock_bh(&ilb->lock);
1870 		sk = sk_nulls_head(&ilb->head);
1871 		st->offset = 0;
1872 		goto get_sk;
1873 	}
1874 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1875 	++st->num;
1876 	++st->offset;
1877 
1878 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1879 		struct request_sock *req = cur;
1880 
1881 		icsk = inet_csk(st->syn_wait_sk);
1882 		req = req->dl_next;
1883 		while (1) {
1884 			while (req) {
1885 				if (req->rsk_ops->family == st->family) {
1886 					cur = req;
1887 					goto out;
1888 				}
1889 				req = req->dl_next;
1890 			}
1891 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1892 				break;
1893 get_req:
1894 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1895 		}
1896 		sk	  = sk_nulls_next(st->syn_wait_sk);
1897 		st->state = TCP_SEQ_STATE_LISTENING;
1898 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1899 	} else {
1900 		icsk = inet_csk(sk);
1901 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1902 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1903 			goto start_req;
1904 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1905 		sk = sk_nulls_next(sk);
1906 	}
1907 get_sk:
1908 	sk_nulls_for_each_from(sk, node) {
1909 		if (!net_eq(sock_net(sk), net))
1910 			continue;
1911 		if (sk->sk_family == st->family) {
1912 			cur = sk;
1913 			goto out;
1914 		}
1915 		icsk = inet_csk(sk);
1916 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1917 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1918 start_req:
1919 			st->uid		= sock_i_uid(sk);
1920 			st->syn_wait_sk = sk;
1921 			st->state	= TCP_SEQ_STATE_OPENREQ;
1922 			st->sbucket	= 0;
1923 			goto get_req;
1924 		}
1925 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1926 	}
1927 	spin_unlock_bh(&ilb->lock);
1928 	st->offset = 0;
1929 	if (++st->bucket < INET_LHTABLE_SIZE) {
1930 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1931 		spin_lock_bh(&ilb->lock);
1932 		sk = sk_nulls_head(&ilb->head);
1933 		goto get_sk;
1934 	}
1935 	cur = NULL;
1936 out:
1937 	return cur;
1938 }
1939 
1940 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1941 {
1942 	struct tcp_iter_state *st = seq->private;
1943 	void *rc;
1944 
1945 	st->bucket = 0;
1946 	st->offset = 0;
1947 	rc = listening_get_next(seq, NULL);
1948 
1949 	while (rc && *pos) {
1950 		rc = listening_get_next(seq, rc);
1951 		--*pos;
1952 	}
1953 	return rc;
1954 }
1955 
1956 static inline bool empty_bucket(const struct tcp_iter_state *st)
1957 {
1958 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1959 }
1960 
1961 /*
1962  * Get first established socket starting from bucket given in st->bucket.
1963  * If st->bucket is zero, the very first socket in the hash is returned.
1964  */
1965 static void *established_get_first(struct seq_file *seq)
1966 {
1967 	struct tcp_iter_state *st = seq->private;
1968 	struct net *net = seq_file_net(seq);
1969 	void *rc = NULL;
1970 
1971 	st->offset = 0;
1972 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1973 		struct sock *sk;
1974 		struct hlist_nulls_node *node;
1975 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1976 
1977 		/* Lockless fast path for the common case of empty buckets */
1978 		if (empty_bucket(st))
1979 			continue;
1980 
1981 		spin_lock_bh(lock);
1982 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1983 			if (sk->sk_family != st->family ||
1984 			    !net_eq(sock_net(sk), net)) {
1985 				continue;
1986 			}
1987 			rc = sk;
1988 			goto out;
1989 		}
1990 		spin_unlock_bh(lock);
1991 	}
1992 out:
1993 	return rc;
1994 }
1995 
1996 static void *established_get_next(struct seq_file *seq, void *cur)
1997 {
1998 	struct sock *sk = cur;
1999 	struct hlist_nulls_node *node;
2000 	struct tcp_iter_state *st = seq->private;
2001 	struct net *net = seq_file_net(seq);
2002 
2003 	++st->num;
2004 	++st->offset;
2005 
2006 	sk = sk_nulls_next(sk);
2007 
2008 	sk_nulls_for_each_from(sk, node) {
2009 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2010 			return sk;
2011 	}
2012 
2013 	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2014 	++st->bucket;
2015 	return established_get_first(seq);
2016 }
2017 
2018 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2019 {
2020 	struct tcp_iter_state *st = seq->private;
2021 	void *rc;
2022 
2023 	st->bucket = 0;
2024 	rc = established_get_first(seq);
2025 
2026 	while (rc && pos) {
2027 		rc = established_get_next(seq, rc);
2028 		--pos;
2029 	}
2030 	return rc;
2031 }
2032 
2033 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2034 {
2035 	void *rc;
2036 	struct tcp_iter_state *st = seq->private;
2037 
2038 	st->state = TCP_SEQ_STATE_LISTENING;
2039 	rc	  = listening_get_idx(seq, &pos);
2040 
2041 	if (!rc) {
2042 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2043 		rc	  = established_get_idx(seq, pos);
2044 	}
2045 
2046 	return rc;
2047 }
2048 
2049 static void *tcp_seek_last_pos(struct seq_file *seq)
2050 {
2051 	struct tcp_iter_state *st = seq->private;
2052 	int offset = st->offset;
2053 	int orig_num = st->num;
2054 	void *rc = NULL;
2055 
2056 	switch (st->state) {
2057 	case TCP_SEQ_STATE_OPENREQ:
2058 	case TCP_SEQ_STATE_LISTENING:
2059 		if (st->bucket >= INET_LHTABLE_SIZE)
2060 			break;
2061 		st->state = TCP_SEQ_STATE_LISTENING;
2062 		rc = listening_get_next(seq, NULL);
2063 		while (offset-- && rc)
2064 			rc = listening_get_next(seq, rc);
2065 		if (rc)
2066 			break;
2067 		st->bucket = 0;
2068 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2069 		/* Fallthrough */
2070 	case TCP_SEQ_STATE_ESTABLISHED:
2071 		if (st->bucket > tcp_hashinfo.ehash_mask)
2072 			break;
2073 		rc = established_get_first(seq);
2074 		while (offset-- && rc)
2075 			rc = established_get_next(seq, rc);
2076 	}
2077 
2078 	st->num = orig_num;
2079 
2080 	return rc;
2081 }
2082 
2083 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2084 {
2085 	struct tcp_iter_state *st = seq->private;
2086 	void *rc;
2087 
2088 	if (*pos && *pos == st->last_pos) {
2089 		rc = tcp_seek_last_pos(seq);
2090 		if (rc)
2091 			goto out;
2092 	}
2093 
2094 	st->state = TCP_SEQ_STATE_LISTENING;
2095 	st->num = 0;
2096 	st->bucket = 0;
2097 	st->offset = 0;
2098 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2099 
2100 out:
2101 	st->last_pos = *pos;
2102 	return rc;
2103 }
2104 
2105 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2106 {
2107 	struct tcp_iter_state *st = seq->private;
2108 	void *rc = NULL;
2109 
2110 	if (v == SEQ_START_TOKEN) {
2111 		rc = tcp_get_idx(seq, 0);
2112 		goto out;
2113 	}
2114 
2115 	switch (st->state) {
2116 	case TCP_SEQ_STATE_OPENREQ:
2117 	case TCP_SEQ_STATE_LISTENING:
2118 		rc = listening_get_next(seq, v);
2119 		if (!rc) {
2120 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2121 			st->bucket = 0;
2122 			st->offset = 0;
2123 			rc	  = established_get_first(seq);
2124 		}
2125 		break;
2126 	case TCP_SEQ_STATE_ESTABLISHED:
2127 		rc = established_get_next(seq, v);
2128 		break;
2129 	}
2130 out:
2131 	++*pos;
2132 	st->last_pos = *pos;
2133 	return rc;
2134 }
2135 
2136 static void tcp_seq_stop(struct seq_file *seq, void *v)
2137 {
2138 	struct tcp_iter_state *st = seq->private;
2139 
2140 	switch (st->state) {
2141 	case TCP_SEQ_STATE_OPENREQ:
2142 		if (v) {
2143 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2144 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2145 		}
2146 	case TCP_SEQ_STATE_LISTENING:
2147 		if (v != SEQ_START_TOKEN)
2148 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2149 		break;
2150 	case TCP_SEQ_STATE_ESTABLISHED:
2151 		if (v)
2152 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2153 		break;
2154 	}
2155 }
2156 
2157 int tcp_seq_open(struct inode *inode, struct file *file)
2158 {
2159 	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2160 	struct tcp_iter_state *s;
2161 	int err;
2162 
2163 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2164 			  sizeof(struct tcp_iter_state));
2165 	if (err < 0)
2166 		return err;
2167 
2168 	s = ((struct seq_file *)file->private_data)->private;
2169 	s->family		= afinfo->family;
2170 	s->last_pos		= 0;
2171 	return 0;
2172 }
2173 EXPORT_SYMBOL(tcp_seq_open);
2174 
2175 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2176 {
2177 	int rc = 0;
2178 	struct proc_dir_entry *p;
2179 
2180 	afinfo->seq_ops.start		= tcp_seq_start;
2181 	afinfo->seq_ops.next		= tcp_seq_next;
2182 	afinfo->seq_ops.stop		= tcp_seq_stop;
2183 
2184 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2185 			     afinfo->seq_fops, afinfo);
2186 	if (!p)
2187 		rc = -ENOMEM;
2188 	return rc;
2189 }
2190 EXPORT_SYMBOL(tcp_proc_register);
2191 
2192 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2193 {
2194 	remove_proc_entry(afinfo->name, net->proc_net);
2195 }
2196 EXPORT_SYMBOL(tcp_proc_unregister);
2197 
2198 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2199 			 struct seq_file *f, int i, kuid_t uid)
2200 {
2201 	const struct inet_request_sock *ireq = inet_rsk(req);
2202 	long delta = req->expires - jiffies;
2203 
2204 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2205 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2206 		i,
2207 		ireq->ir_loc_addr,
2208 		ntohs(inet_sk(sk)->inet_sport),
2209 		ireq->ir_rmt_addr,
2210 		ntohs(ireq->ir_rmt_port),
2211 		TCP_SYN_RECV,
2212 		0, 0, /* could print option size, but that is af dependent. */
2213 		1,    /* timers active (only the expire timer) */
2214 		jiffies_delta_to_clock_t(delta),
2215 		req->num_timeout,
2216 		from_kuid_munged(seq_user_ns(f), uid),
2217 		0,  /* non standard timer */
2218 		0, /* open_requests have no inode */
2219 		atomic_read(&sk->sk_refcnt),
2220 		req);
2221 }
2222 
2223 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2224 {
2225 	int timer_active;
2226 	unsigned long timer_expires;
2227 	const struct tcp_sock *tp = tcp_sk(sk);
2228 	const struct inet_connection_sock *icsk = inet_csk(sk);
2229 	const struct inet_sock *inet = inet_sk(sk);
2230 	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2231 	__be32 dest = inet->inet_daddr;
2232 	__be32 src = inet->inet_rcv_saddr;
2233 	__u16 destp = ntohs(inet->inet_dport);
2234 	__u16 srcp = ntohs(inet->inet_sport);
2235 	int rx_queue;
2236 
2237 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2238 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2239 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2240 		timer_active	= 1;
2241 		timer_expires	= icsk->icsk_timeout;
2242 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2243 		timer_active	= 4;
2244 		timer_expires	= icsk->icsk_timeout;
2245 	} else if (timer_pending(&sk->sk_timer)) {
2246 		timer_active	= 2;
2247 		timer_expires	= sk->sk_timer.expires;
2248 	} else {
2249 		timer_active	= 0;
2250 		timer_expires = jiffies;
2251 	}
2252 
2253 	if (sk->sk_state == TCP_LISTEN)
2254 		rx_queue = sk->sk_ack_backlog;
2255 	else
2256 		/*
2257 		 * because we dont lock socket, we might find a transient negative value
2258 		 */
2259 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2260 
2261 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2262 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2263 		i, src, srcp, dest, destp, sk->sk_state,
2264 		tp->write_seq - tp->snd_una,
2265 		rx_queue,
2266 		timer_active,
2267 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2268 		icsk->icsk_retransmits,
2269 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2270 		icsk->icsk_probes_out,
2271 		sock_i_ino(sk),
2272 		atomic_read(&sk->sk_refcnt), sk,
2273 		jiffies_to_clock_t(icsk->icsk_rto),
2274 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2275 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2276 		tp->snd_cwnd,
2277 		sk->sk_state == TCP_LISTEN ?
2278 		    (fastopenq ? fastopenq->max_qlen : 0) :
2279 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2280 }
2281 
2282 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2283 			       struct seq_file *f, int i)
2284 {
2285 	__be32 dest, src;
2286 	__u16 destp, srcp;
2287 	s32 delta = tw->tw_ttd - inet_tw_time_stamp();
2288 
2289 	dest  = tw->tw_daddr;
2290 	src   = tw->tw_rcv_saddr;
2291 	destp = ntohs(tw->tw_dport);
2292 	srcp  = ntohs(tw->tw_sport);
2293 
2294 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2295 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2296 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2297 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2298 		atomic_read(&tw->tw_refcnt), tw);
2299 }
2300 
2301 #define TMPSZ 150
2302 
2303 static int tcp4_seq_show(struct seq_file *seq, void *v)
2304 {
2305 	struct tcp_iter_state *st;
2306 	struct sock *sk = v;
2307 
2308 	seq_setwidth(seq, TMPSZ - 1);
2309 	if (v == SEQ_START_TOKEN) {
2310 		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2311 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2312 			   "inode");
2313 		goto out;
2314 	}
2315 	st = seq->private;
2316 
2317 	switch (st->state) {
2318 	case TCP_SEQ_STATE_LISTENING:
2319 	case TCP_SEQ_STATE_ESTABLISHED:
2320 		if (sk->sk_state == TCP_TIME_WAIT)
2321 			get_timewait4_sock(v, seq, st->num);
2322 		else
2323 			get_tcp4_sock(v, seq, st->num);
2324 		break;
2325 	case TCP_SEQ_STATE_OPENREQ:
2326 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
2327 		break;
2328 	}
2329 out:
2330 	seq_pad(seq, '\n');
2331 	return 0;
2332 }
2333 
2334 static const struct file_operations tcp_afinfo_seq_fops = {
2335 	.owner   = THIS_MODULE,
2336 	.open    = tcp_seq_open,
2337 	.read    = seq_read,
2338 	.llseek  = seq_lseek,
2339 	.release = seq_release_net
2340 };
2341 
2342 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2343 	.name		= "tcp",
2344 	.family		= AF_INET,
2345 	.seq_fops	= &tcp_afinfo_seq_fops,
2346 	.seq_ops	= {
2347 		.show		= tcp4_seq_show,
2348 	},
2349 };
2350 
2351 static int __net_init tcp4_proc_init_net(struct net *net)
2352 {
2353 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2354 }
2355 
2356 static void __net_exit tcp4_proc_exit_net(struct net *net)
2357 {
2358 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2359 }
2360 
2361 static struct pernet_operations tcp4_net_ops = {
2362 	.init = tcp4_proc_init_net,
2363 	.exit = tcp4_proc_exit_net,
2364 };
2365 
2366 int __init tcp4_proc_init(void)
2367 {
2368 	return register_pernet_subsys(&tcp4_net_ops);
2369 }
2370 
2371 void tcp4_proc_exit(void)
2372 {
2373 	unregister_pernet_subsys(&tcp4_net_ops);
2374 }
2375 #endif /* CONFIG_PROC_FS */
2376 
2377 struct proto tcp_prot = {
2378 	.name			= "TCP",
2379 	.owner			= THIS_MODULE,
2380 	.close			= tcp_close,
2381 	.connect		= tcp_v4_connect,
2382 	.disconnect		= tcp_disconnect,
2383 	.accept			= inet_csk_accept,
2384 	.ioctl			= tcp_ioctl,
2385 	.init			= tcp_v4_init_sock,
2386 	.destroy		= tcp_v4_destroy_sock,
2387 	.shutdown		= tcp_shutdown,
2388 	.setsockopt		= tcp_setsockopt,
2389 	.getsockopt		= tcp_getsockopt,
2390 	.recvmsg		= tcp_recvmsg,
2391 	.sendmsg		= tcp_sendmsg,
2392 	.sendpage		= tcp_sendpage,
2393 	.backlog_rcv		= tcp_v4_do_rcv,
2394 	.release_cb		= tcp_release_cb,
2395 	.hash			= inet_hash,
2396 	.unhash			= inet_unhash,
2397 	.get_port		= inet_csk_get_port,
2398 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2399 	.stream_memory_free	= tcp_stream_memory_free,
2400 	.sockets_allocated	= &tcp_sockets_allocated,
2401 	.orphan_count		= &tcp_orphan_count,
2402 	.memory_allocated	= &tcp_memory_allocated,
2403 	.memory_pressure	= &tcp_memory_pressure,
2404 	.sysctl_mem		= sysctl_tcp_mem,
2405 	.sysctl_wmem		= sysctl_tcp_wmem,
2406 	.sysctl_rmem		= sysctl_tcp_rmem,
2407 	.max_header		= MAX_TCP_HEADER,
2408 	.obj_size		= sizeof(struct tcp_sock),
2409 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2410 	.twsk_prot		= &tcp_timewait_sock_ops,
2411 	.rsk_prot		= &tcp_request_sock_ops,
2412 	.h.hashinfo		= &tcp_hashinfo,
2413 	.no_autobind		= true,
2414 #ifdef CONFIG_COMPAT
2415 	.compat_setsockopt	= compat_tcp_setsockopt,
2416 	.compat_getsockopt	= compat_tcp_getsockopt,
2417 #endif
2418 #ifdef CONFIG_MEMCG_KMEM
2419 	.init_cgroup		= tcp_init_cgroup,
2420 	.destroy_cgroup		= tcp_destroy_cgroup,
2421 	.proto_cgroup		= tcp_proto_cgroup,
2422 #endif
2423 };
2424 EXPORT_SYMBOL(tcp_prot);
2425 
2426 static int __net_init tcp_sk_init(struct net *net)
2427 {
2428 	net->ipv4.sysctl_tcp_ecn = 2;
2429 	return 0;
2430 }
2431 
2432 static void __net_exit tcp_sk_exit(struct net *net)
2433 {
2434 }
2435 
2436 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2437 {
2438 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2439 }
2440 
2441 static struct pernet_operations __net_initdata tcp_sk_ops = {
2442        .init	   = tcp_sk_init,
2443        .exit	   = tcp_sk_exit,
2444        .exit_batch = tcp_sk_exit_batch,
2445 };
2446 
2447 void __init tcp_v4_init(void)
2448 {
2449 	inet_hashinfo_init(&tcp_hashinfo);
2450 	if (register_pernet_subsys(&tcp_sk_ops))
2451 		panic("Failed to create the TCP control socket.\n");
2452 }
2453