xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision 62e7ca52)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 #define pr_fmt(fmt) "TCP: " fmt
54 
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65 
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/netdma.h>
76 #include <net/secure_seq.h>
77 #include <net/tcp_memcontrol.h>
78 #include <net/busy_poll.h>
79 
80 #include <linux/inet.h>
81 #include <linux/ipv6.h>
82 #include <linux/stddef.h>
83 #include <linux/proc_fs.h>
84 #include <linux/seq_file.h>
85 
86 #include <linux/crypto.h>
87 #include <linux/scatterlist.h>
88 
89 int sysctl_tcp_tw_reuse __read_mostly;
90 int sysctl_tcp_low_latency __read_mostly;
91 EXPORT_SYMBOL(sysctl_tcp_low_latency);
92 
93 
94 #ifdef CONFIG_TCP_MD5SIG
95 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
96 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
97 #endif
98 
99 struct inet_hashinfo tcp_hashinfo;
100 EXPORT_SYMBOL(tcp_hashinfo);
101 
102 static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
103 {
104 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 					  ip_hdr(skb)->saddr,
106 					  tcp_hdr(skb)->dest,
107 					  tcp_hdr(skb)->source);
108 }
109 
110 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111 {
112 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 	struct tcp_sock *tp = tcp_sk(sk);
114 
115 	/* With PAWS, it is safe from the viewpoint
116 	   of data integrity. Even without PAWS it is safe provided sequence
117 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118 
119 	   Actually, the idea is close to VJ's one, only timestamp cache is
120 	   held not per host, but per port pair and TW bucket is used as state
121 	   holder.
122 
123 	   If TW bucket has been already destroyed we fall back to VJ's scheme
124 	   and use initial timestamp retrieved from peer table.
125 	 */
126 	if (tcptw->tw_ts_recent_stamp &&
127 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
128 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
129 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 		if (tp->write_seq == 0)
131 			tp->write_seq = 1;
132 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
133 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 		sock_hold(sktw);
135 		return 1;
136 	}
137 
138 	return 0;
139 }
140 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141 
142 /* This will initiate an outgoing connection. */
143 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144 {
145 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
146 	struct inet_sock *inet = inet_sk(sk);
147 	struct tcp_sock *tp = tcp_sk(sk);
148 	__be16 orig_sport, orig_dport;
149 	__be32 daddr, nexthop;
150 	struct flowi4 *fl4;
151 	struct rtable *rt;
152 	int err;
153 	struct ip_options_rcu *inet_opt;
154 
155 	if (addr_len < sizeof(struct sockaddr_in))
156 		return -EINVAL;
157 
158 	if (usin->sin_family != AF_INET)
159 		return -EAFNOSUPPORT;
160 
161 	nexthop = daddr = usin->sin_addr.s_addr;
162 	inet_opt = rcu_dereference_protected(inet->inet_opt,
163 					     sock_owned_by_user(sk));
164 	if (inet_opt && inet_opt->opt.srr) {
165 		if (!daddr)
166 			return -EINVAL;
167 		nexthop = inet_opt->opt.faddr;
168 	}
169 
170 	orig_sport = inet->inet_sport;
171 	orig_dport = usin->sin_port;
172 	fl4 = &inet->cork.fl.u.ip4;
173 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
174 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 			      IPPROTO_TCP,
176 			      orig_sport, orig_dport, sk);
177 	if (IS_ERR(rt)) {
178 		err = PTR_ERR(rt);
179 		if (err == -ENETUNREACH)
180 			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
181 		return err;
182 	}
183 
184 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 		ip_rt_put(rt);
186 		return -ENETUNREACH;
187 	}
188 
189 	if (!inet_opt || !inet_opt->opt.srr)
190 		daddr = fl4->daddr;
191 
192 	if (!inet->inet_saddr)
193 		inet->inet_saddr = fl4->saddr;
194 	inet->inet_rcv_saddr = inet->inet_saddr;
195 
196 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
197 		/* Reset inherited state */
198 		tp->rx_opt.ts_recent	   = 0;
199 		tp->rx_opt.ts_recent_stamp = 0;
200 		if (likely(!tp->repair))
201 			tp->write_seq	   = 0;
202 	}
203 
204 	if (tcp_death_row.sysctl_tw_recycle &&
205 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 		tcp_fetch_timewait_stamp(sk, &rt->dst);
207 
208 	inet->inet_dport = usin->sin_port;
209 	inet->inet_daddr = daddr;
210 
211 	inet_set_txhash(sk);
212 
213 	inet_csk(sk)->icsk_ext_hdr_len = 0;
214 	if (inet_opt)
215 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
216 
217 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
218 
219 	/* Socket identity is still unknown (sport may be zero).
220 	 * However we set state to SYN-SENT and not releasing socket
221 	 * lock select source port, enter ourselves into the hash tables and
222 	 * complete initialization after this.
223 	 */
224 	tcp_set_state(sk, TCP_SYN_SENT);
225 	err = inet_hash_connect(&tcp_death_row, sk);
226 	if (err)
227 		goto failure;
228 
229 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
230 			       inet->inet_sport, inet->inet_dport, sk);
231 	if (IS_ERR(rt)) {
232 		err = PTR_ERR(rt);
233 		rt = NULL;
234 		goto failure;
235 	}
236 	/* OK, now commit destination to socket.  */
237 	sk->sk_gso_type = SKB_GSO_TCPV4;
238 	sk_setup_caps(sk, &rt->dst);
239 
240 	if (!tp->write_seq && likely(!tp->repair))
241 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
242 							   inet->inet_daddr,
243 							   inet->inet_sport,
244 							   usin->sin_port);
245 
246 	inet->inet_id = tp->write_seq ^ jiffies;
247 
248 	err = tcp_connect(sk);
249 
250 	rt = NULL;
251 	if (err)
252 		goto failure;
253 
254 	return 0;
255 
256 failure:
257 	/*
258 	 * This unhashes the socket and releases the local port,
259 	 * if necessary.
260 	 */
261 	tcp_set_state(sk, TCP_CLOSE);
262 	ip_rt_put(rt);
263 	sk->sk_route_caps = 0;
264 	inet->inet_dport = 0;
265 	return err;
266 }
267 EXPORT_SYMBOL(tcp_v4_connect);
268 
269 /*
270  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
271  * It can be called through tcp_release_cb() if socket was owned by user
272  * at the time tcp_v4_err() was called to handle ICMP message.
273  */
274 static void tcp_v4_mtu_reduced(struct sock *sk)
275 {
276 	struct dst_entry *dst;
277 	struct inet_sock *inet = inet_sk(sk);
278 	u32 mtu = tcp_sk(sk)->mtu_info;
279 
280 	dst = inet_csk_update_pmtu(sk, mtu);
281 	if (!dst)
282 		return;
283 
284 	/* Something is about to be wrong... Remember soft error
285 	 * for the case, if this connection will not able to recover.
286 	 */
287 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
288 		sk->sk_err_soft = EMSGSIZE;
289 
290 	mtu = dst_mtu(dst);
291 
292 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
293 	    ip_sk_accept_pmtu(sk) &&
294 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
295 		tcp_sync_mss(sk, mtu);
296 
297 		/* Resend the TCP packet because it's
298 		 * clear that the old packet has been
299 		 * dropped. This is the new "fast" path mtu
300 		 * discovery.
301 		 */
302 		tcp_simple_retransmit(sk);
303 	} /* else let the usual retransmit timer handle it */
304 }
305 
306 static void do_redirect(struct sk_buff *skb, struct sock *sk)
307 {
308 	struct dst_entry *dst = __sk_dst_check(sk, 0);
309 
310 	if (dst)
311 		dst->ops->redirect(dst, sk, skb);
312 }
313 
314 /*
315  * This routine is called by the ICMP module when it gets some
316  * sort of error condition.  If err < 0 then the socket should
317  * be closed and the error returned to the user.  If err > 0
318  * it's just the icmp type << 8 | icmp code.  After adjustment
319  * header points to the first 8 bytes of the tcp header.  We need
320  * to find the appropriate port.
321  *
322  * The locking strategy used here is very "optimistic". When
323  * someone else accesses the socket the ICMP is just dropped
324  * and for some paths there is no check at all.
325  * A more general error queue to queue errors for later handling
326  * is probably better.
327  *
328  */
329 
330 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
331 {
332 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
333 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
334 	struct inet_connection_sock *icsk;
335 	struct tcp_sock *tp;
336 	struct inet_sock *inet;
337 	const int type = icmp_hdr(icmp_skb)->type;
338 	const int code = icmp_hdr(icmp_skb)->code;
339 	struct sock *sk;
340 	struct sk_buff *skb;
341 	struct request_sock *fastopen;
342 	__u32 seq, snd_una;
343 	__u32 remaining;
344 	int err;
345 	struct net *net = dev_net(icmp_skb->dev);
346 
347 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
348 			iph->saddr, th->source, inet_iif(icmp_skb));
349 	if (!sk) {
350 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
351 		return;
352 	}
353 	if (sk->sk_state == TCP_TIME_WAIT) {
354 		inet_twsk_put(inet_twsk(sk));
355 		return;
356 	}
357 
358 	bh_lock_sock(sk);
359 	/* If too many ICMPs get dropped on busy
360 	 * servers this needs to be solved differently.
361 	 * We do take care of PMTU discovery (RFC1191) special case :
362 	 * we can receive locally generated ICMP messages while socket is held.
363 	 */
364 	if (sock_owned_by_user(sk)) {
365 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
366 			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
367 	}
368 	if (sk->sk_state == TCP_CLOSE)
369 		goto out;
370 
371 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
372 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
373 		goto out;
374 	}
375 
376 	icsk = inet_csk(sk);
377 	tp = tcp_sk(sk);
378 	seq = ntohl(th->seq);
379 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
380 	fastopen = tp->fastopen_rsk;
381 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
382 	if (sk->sk_state != TCP_LISTEN &&
383 	    !between(seq, snd_una, tp->snd_nxt)) {
384 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
385 		goto out;
386 	}
387 
388 	switch (type) {
389 	case ICMP_REDIRECT:
390 		do_redirect(icmp_skb, sk);
391 		goto out;
392 	case ICMP_SOURCE_QUENCH:
393 		/* Just silently ignore these. */
394 		goto out;
395 	case ICMP_PARAMETERPROB:
396 		err = EPROTO;
397 		break;
398 	case ICMP_DEST_UNREACH:
399 		if (code > NR_ICMP_UNREACH)
400 			goto out;
401 
402 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
403 			/* We are not interested in TCP_LISTEN and open_requests
404 			 * (SYN-ACKs send out by Linux are always <576bytes so
405 			 * they should go through unfragmented).
406 			 */
407 			if (sk->sk_state == TCP_LISTEN)
408 				goto out;
409 
410 			tp->mtu_info = info;
411 			if (!sock_owned_by_user(sk)) {
412 				tcp_v4_mtu_reduced(sk);
413 			} else {
414 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
415 					sock_hold(sk);
416 			}
417 			goto out;
418 		}
419 
420 		err = icmp_err_convert[code].errno;
421 		/* check if icmp_skb allows revert of backoff
422 		 * (see draft-zimmermann-tcp-lcd) */
423 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
424 			break;
425 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
426 		    !icsk->icsk_backoff || fastopen)
427 			break;
428 
429 		if (sock_owned_by_user(sk))
430 			break;
431 
432 		icsk->icsk_backoff--;
433 		inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
434 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
435 		tcp_bound_rto(sk);
436 
437 		skb = tcp_write_queue_head(sk);
438 		BUG_ON(!skb);
439 
440 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
441 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
442 
443 		if (remaining) {
444 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
445 						  remaining, TCP_RTO_MAX);
446 		} else {
447 			/* RTO revert clocked out retransmission.
448 			 * Will retransmit now */
449 			tcp_retransmit_timer(sk);
450 		}
451 
452 		break;
453 	case ICMP_TIME_EXCEEDED:
454 		err = EHOSTUNREACH;
455 		break;
456 	default:
457 		goto out;
458 	}
459 
460 	switch (sk->sk_state) {
461 		struct request_sock *req, **prev;
462 	case TCP_LISTEN:
463 		if (sock_owned_by_user(sk))
464 			goto out;
465 
466 		req = inet_csk_search_req(sk, &prev, th->dest,
467 					  iph->daddr, iph->saddr);
468 		if (!req)
469 			goto out;
470 
471 		/* ICMPs are not backlogged, hence we cannot get
472 		   an established socket here.
473 		 */
474 		WARN_ON(req->sk);
475 
476 		if (seq != tcp_rsk(req)->snt_isn) {
477 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
478 			goto out;
479 		}
480 
481 		/*
482 		 * Still in SYN_RECV, just remove it silently.
483 		 * There is no good way to pass the error to the newly
484 		 * created socket, and POSIX does not want network
485 		 * errors returned from accept().
486 		 */
487 		inet_csk_reqsk_queue_drop(sk, req, prev);
488 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
489 		goto out;
490 
491 	case TCP_SYN_SENT:
492 	case TCP_SYN_RECV:
493 		/* Only in fast or simultaneous open. If a fast open socket is
494 		 * is already accepted it is treated as a connected one below.
495 		 */
496 		if (fastopen && fastopen->sk == NULL)
497 			break;
498 
499 		if (!sock_owned_by_user(sk)) {
500 			sk->sk_err = err;
501 
502 			sk->sk_error_report(sk);
503 
504 			tcp_done(sk);
505 		} else {
506 			sk->sk_err_soft = err;
507 		}
508 		goto out;
509 	}
510 
511 	/* If we've already connected we will keep trying
512 	 * until we time out, or the user gives up.
513 	 *
514 	 * rfc1122 4.2.3.9 allows to consider as hard errors
515 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
516 	 * but it is obsoleted by pmtu discovery).
517 	 *
518 	 * Note, that in modern internet, where routing is unreliable
519 	 * and in each dark corner broken firewalls sit, sending random
520 	 * errors ordered by their masters even this two messages finally lose
521 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
522 	 *
523 	 * Now we are in compliance with RFCs.
524 	 *							--ANK (980905)
525 	 */
526 
527 	inet = inet_sk(sk);
528 	if (!sock_owned_by_user(sk) && inet->recverr) {
529 		sk->sk_err = err;
530 		sk->sk_error_report(sk);
531 	} else	{ /* Only an error on timeout */
532 		sk->sk_err_soft = err;
533 	}
534 
535 out:
536 	bh_unlock_sock(sk);
537 	sock_put(sk);
538 }
539 
540 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
541 {
542 	struct tcphdr *th = tcp_hdr(skb);
543 
544 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
545 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
546 		skb->csum_start = skb_transport_header(skb) - skb->head;
547 		skb->csum_offset = offsetof(struct tcphdr, check);
548 	} else {
549 		th->check = tcp_v4_check(skb->len, saddr, daddr,
550 					 csum_partial(th,
551 						      th->doff << 2,
552 						      skb->csum));
553 	}
554 }
555 
556 /* This routine computes an IPv4 TCP checksum. */
557 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
558 {
559 	const struct inet_sock *inet = inet_sk(sk);
560 
561 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
562 }
563 EXPORT_SYMBOL(tcp_v4_send_check);
564 
565 /*
566  *	This routine will send an RST to the other tcp.
567  *
568  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
569  *		      for reset.
570  *	Answer: if a packet caused RST, it is not for a socket
571  *		existing in our system, if it is matched to a socket,
572  *		it is just duplicate segment or bug in other side's TCP.
573  *		So that we build reply only basing on parameters
574  *		arrived with segment.
575  *	Exception: precedence violation. We do not implement it in any case.
576  */
577 
578 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
579 {
580 	const struct tcphdr *th = tcp_hdr(skb);
581 	struct {
582 		struct tcphdr th;
583 #ifdef CONFIG_TCP_MD5SIG
584 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
585 #endif
586 	} rep;
587 	struct ip_reply_arg arg;
588 #ifdef CONFIG_TCP_MD5SIG
589 	struct tcp_md5sig_key *key;
590 	const __u8 *hash_location = NULL;
591 	unsigned char newhash[16];
592 	int genhash;
593 	struct sock *sk1 = NULL;
594 #endif
595 	struct net *net;
596 
597 	/* Never send a reset in response to a reset. */
598 	if (th->rst)
599 		return;
600 
601 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
602 		return;
603 
604 	/* Swap the send and the receive. */
605 	memset(&rep, 0, sizeof(rep));
606 	rep.th.dest   = th->source;
607 	rep.th.source = th->dest;
608 	rep.th.doff   = sizeof(struct tcphdr) / 4;
609 	rep.th.rst    = 1;
610 
611 	if (th->ack) {
612 		rep.th.seq = th->ack_seq;
613 	} else {
614 		rep.th.ack = 1;
615 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
616 				       skb->len - (th->doff << 2));
617 	}
618 
619 	memset(&arg, 0, sizeof(arg));
620 	arg.iov[0].iov_base = (unsigned char *)&rep;
621 	arg.iov[0].iov_len  = sizeof(rep.th);
622 
623 #ifdef CONFIG_TCP_MD5SIG
624 	hash_location = tcp_parse_md5sig_option(th);
625 	if (!sk && hash_location) {
626 		/*
627 		 * active side is lost. Try to find listening socket through
628 		 * source port, and then find md5 key through listening socket.
629 		 * we are not loose security here:
630 		 * Incoming packet is checked with md5 hash with finding key,
631 		 * no RST generated if md5 hash doesn't match.
632 		 */
633 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
634 					     &tcp_hashinfo, ip_hdr(skb)->saddr,
635 					     th->source, ip_hdr(skb)->daddr,
636 					     ntohs(th->source), inet_iif(skb));
637 		/* don't send rst if it can't find key */
638 		if (!sk1)
639 			return;
640 		rcu_read_lock();
641 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
642 					&ip_hdr(skb)->saddr, AF_INET);
643 		if (!key)
644 			goto release_sk1;
645 
646 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
647 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
648 			goto release_sk1;
649 	} else {
650 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
651 					     &ip_hdr(skb)->saddr,
652 					     AF_INET) : NULL;
653 	}
654 
655 	if (key) {
656 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
657 				   (TCPOPT_NOP << 16) |
658 				   (TCPOPT_MD5SIG << 8) |
659 				   TCPOLEN_MD5SIG);
660 		/* Update length and the length the header thinks exists */
661 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
662 		rep.th.doff = arg.iov[0].iov_len / 4;
663 
664 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
665 				     key, ip_hdr(skb)->saddr,
666 				     ip_hdr(skb)->daddr, &rep.th);
667 	}
668 #endif
669 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
670 				      ip_hdr(skb)->saddr, /* XXX */
671 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
672 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
673 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
674 	/* When socket is gone, all binding information is lost.
675 	 * routing might fail in this case. No choice here, if we choose to force
676 	 * input interface, we will misroute in case of asymmetric route.
677 	 */
678 	if (sk)
679 		arg.bound_dev_if = sk->sk_bound_dev_if;
680 
681 	net = dev_net(skb_dst(skb)->dev);
682 	arg.tos = ip_hdr(skb)->tos;
683 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
684 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
685 
686 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
687 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
688 
689 #ifdef CONFIG_TCP_MD5SIG
690 release_sk1:
691 	if (sk1) {
692 		rcu_read_unlock();
693 		sock_put(sk1);
694 	}
695 #endif
696 }
697 
698 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
699    outside socket context is ugly, certainly. What can I do?
700  */
701 
702 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
703 			    u32 win, u32 tsval, u32 tsecr, int oif,
704 			    struct tcp_md5sig_key *key,
705 			    int reply_flags, u8 tos)
706 {
707 	const struct tcphdr *th = tcp_hdr(skb);
708 	struct {
709 		struct tcphdr th;
710 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
711 #ifdef CONFIG_TCP_MD5SIG
712 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
713 #endif
714 			];
715 	} rep;
716 	struct ip_reply_arg arg;
717 	struct net *net = dev_net(skb_dst(skb)->dev);
718 
719 	memset(&rep.th, 0, sizeof(struct tcphdr));
720 	memset(&arg, 0, sizeof(arg));
721 
722 	arg.iov[0].iov_base = (unsigned char *)&rep;
723 	arg.iov[0].iov_len  = sizeof(rep.th);
724 	if (tsecr) {
725 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
726 				   (TCPOPT_TIMESTAMP << 8) |
727 				   TCPOLEN_TIMESTAMP);
728 		rep.opt[1] = htonl(tsval);
729 		rep.opt[2] = htonl(tsecr);
730 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
731 	}
732 
733 	/* Swap the send and the receive. */
734 	rep.th.dest    = th->source;
735 	rep.th.source  = th->dest;
736 	rep.th.doff    = arg.iov[0].iov_len / 4;
737 	rep.th.seq     = htonl(seq);
738 	rep.th.ack_seq = htonl(ack);
739 	rep.th.ack     = 1;
740 	rep.th.window  = htons(win);
741 
742 #ifdef CONFIG_TCP_MD5SIG
743 	if (key) {
744 		int offset = (tsecr) ? 3 : 0;
745 
746 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
747 					  (TCPOPT_NOP << 16) |
748 					  (TCPOPT_MD5SIG << 8) |
749 					  TCPOLEN_MD5SIG);
750 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
751 		rep.th.doff = arg.iov[0].iov_len/4;
752 
753 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
754 				    key, ip_hdr(skb)->saddr,
755 				    ip_hdr(skb)->daddr, &rep.th);
756 	}
757 #endif
758 	arg.flags = reply_flags;
759 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
760 				      ip_hdr(skb)->saddr, /* XXX */
761 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
762 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
763 	if (oif)
764 		arg.bound_dev_if = oif;
765 	arg.tos = tos;
766 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
767 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
768 
769 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
770 }
771 
772 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
773 {
774 	struct inet_timewait_sock *tw = inet_twsk(sk);
775 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
776 
777 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
778 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
779 			tcp_time_stamp + tcptw->tw_ts_offset,
780 			tcptw->tw_ts_recent,
781 			tw->tw_bound_dev_if,
782 			tcp_twsk_md5_key(tcptw),
783 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
784 			tw->tw_tos
785 			);
786 
787 	inet_twsk_put(tw);
788 }
789 
790 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
791 				  struct request_sock *req)
792 {
793 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
794 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
795 	 */
796 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
797 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
798 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
799 			tcp_time_stamp,
800 			req->ts_recent,
801 			0,
802 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
803 					  AF_INET),
804 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
805 			ip_hdr(skb)->tos);
806 }
807 
808 /*
809  *	Send a SYN-ACK after having received a SYN.
810  *	This still operates on a request_sock only, not on a big
811  *	socket.
812  */
813 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
814 			      struct flowi *fl,
815 			      struct request_sock *req,
816 			      u16 queue_mapping,
817 			      struct tcp_fastopen_cookie *foc)
818 {
819 	const struct inet_request_sock *ireq = inet_rsk(req);
820 	struct flowi4 fl4;
821 	int err = -1;
822 	struct sk_buff *skb;
823 
824 	/* First, grab a route. */
825 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
826 		return -1;
827 
828 	skb = tcp_make_synack(sk, dst, req, foc);
829 
830 	if (skb) {
831 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
832 
833 		skb_set_queue_mapping(skb, queue_mapping);
834 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
835 					    ireq->ir_rmt_addr,
836 					    ireq->opt);
837 		err = net_xmit_eval(err);
838 	}
839 
840 	return err;
841 }
842 
843 /*
844  *	IPv4 request_sock destructor.
845  */
846 static void tcp_v4_reqsk_destructor(struct request_sock *req)
847 {
848 	kfree(inet_rsk(req)->opt);
849 }
850 
851 /*
852  * Return true if a syncookie should be sent
853  */
854 bool tcp_syn_flood_action(struct sock *sk,
855 			 const struct sk_buff *skb,
856 			 const char *proto)
857 {
858 	const char *msg = "Dropping request";
859 	bool want_cookie = false;
860 	struct listen_sock *lopt;
861 
862 #ifdef CONFIG_SYN_COOKIES
863 	if (sysctl_tcp_syncookies) {
864 		msg = "Sending cookies";
865 		want_cookie = true;
866 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
867 	} else
868 #endif
869 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
870 
871 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
872 	if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
873 		lopt->synflood_warned = 1;
874 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
875 			proto, ntohs(tcp_hdr(skb)->dest), msg);
876 	}
877 	return want_cookie;
878 }
879 EXPORT_SYMBOL(tcp_syn_flood_action);
880 
881 /*
882  * Save and compile IPv4 options into the request_sock if needed.
883  */
884 static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
885 {
886 	const struct ip_options *opt = &(IPCB(skb)->opt);
887 	struct ip_options_rcu *dopt = NULL;
888 
889 	if (opt && opt->optlen) {
890 		int opt_size = sizeof(*dopt) + opt->optlen;
891 
892 		dopt = kmalloc(opt_size, GFP_ATOMIC);
893 		if (dopt) {
894 			if (ip_options_echo(&dopt->opt, skb)) {
895 				kfree(dopt);
896 				dopt = NULL;
897 			}
898 		}
899 	}
900 	return dopt;
901 }
902 
903 #ifdef CONFIG_TCP_MD5SIG
904 /*
905  * RFC2385 MD5 checksumming requires a mapping of
906  * IP address->MD5 Key.
907  * We need to maintain these in the sk structure.
908  */
909 
910 /* Find the Key structure for an address.  */
911 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
912 					 const union tcp_md5_addr *addr,
913 					 int family)
914 {
915 	struct tcp_sock *tp = tcp_sk(sk);
916 	struct tcp_md5sig_key *key;
917 	unsigned int size = sizeof(struct in_addr);
918 	struct tcp_md5sig_info *md5sig;
919 
920 	/* caller either holds rcu_read_lock() or socket lock */
921 	md5sig = rcu_dereference_check(tp->md5sig_info,
922 				       sock_owned_by_user(sk) ||
923 				       lockdep_is_held(&sk->sk_lock.slock));
924 	if (!md5sig)
925 		return NULL;
926 #if IS_ENABLED(CONFIG_IPV6)
927 	if (family == AF_INET6)
928 		size = sizeof(struct in6_addr);
929 #endif
930 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
931 		if (key->family != family)
932 			continue;
933 		if (!memcmp(&key->addr, addr, size))
934 			return key;
935 	}
936 	return NULL;
937 }
938 EXPORT_SYMBOL(tcp_md5_do_lookup);
939 
940 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
941 					 struct sock *addr_sk)
942 {
943 	union tcp_md5_addr *addr;
944 
945 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
946 	return tcp_md5_do_lookup(sk, addr, AF_INET);
947 }
948 EXPORT_SYMBOL(tcp_v4_md5_lookup);
949 
950 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
951 						      struct request_sock *req)
952 {
953 	union tcp_md5_addr *addr;
954 
955 	addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
956 	return tcp_md5_do_lookup(sk, addr, AF_INET);
957 }
958 
959 /* This can be called on a newly created socket, from other files */
960 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
961 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
962 {
963 	/* Add Key to the list */
964 	struct tcp_md5sig_key *key;
965 	struct tcp_sock *tp = tcp_sk(sk);
966 	struct tcp_md5sig_info *md5sig;
967 
968 	key = tcp_md5_do_lookup(sk, addr, family);
969 	if (key) {
970 		/* Pre-existing entry - just update that one. */
971 		memcpy(key->key, newkey, newkeylen);
972 		key->keylen = newkeylen;
973 		return 0;
974 	}
975 
976 	md5sig = rcu_dereference_protected(tp->md5sig_info,
977 					   sock_owned_by_user(sk));
978 	if (!md5sig) {
979 		md5sig = kmalloc(sizeof(*md5sig), gfp);
980 		if (!md5sig)
981 			return -ENOMEM;
982 
983 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
984 		INIT_HLIST_HEAD(&md5sig->head);
985 		rcu_assign_pointer(tp->md5sig_info, md5sig);
986 	}
987 
988 	key = sock_kmalloc(sk, sizeof(*key), gfp);
989 	if (!key)
990 		return -ENOMEM;
991 	if (!tcp_alloc_md5sig_pool()) {
992 		sock_kfree_s(sk, key, sizeof(*key));
993 		return -ENOMEM;
994 	}
995 
996 	memcpy(key->key, newkey, newkeylen);
997 	key->keylen = newkeylen;
998 	key->family = family;
999 	memcpy(&key->addr, addr,
1000 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1001 				      sizeof(struct in_addr));
1002 	hlist_add_head_rcu(&key->node, &md5sig->head);
1003 	return 0;
1004 }
1005 EXPORT_SYMBOL(tcp_md5_do_add);
1006 
1007 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1008 {
1009 	struct tcp_md5sig_key *key;
1010 
1011 	key = tcp_md5_do_lookup(sk, addr, family);
1012 	if (!key)
1013 		return -ENOENT;
1014 	hlist_del_rcu(&key->node);
1015 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1016 	kfree_rcu(key, rcu);
1017 	return 0;
1018 }
1019 EXPORT_SYMBOL(tcp_md5_do_del);
1020 
1021 static void tcp_clear_md5_list(struct sock *sk)
1022 {
1023 	struct tcp_sock *tp = tcp_sk(sk);
1024 	struct tcp_md5sig_key *key;
1025 	struct hlist_node *n;
1026 	struct tcp_md5sig_info *md5sig;
1027 
1028 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1029 
1030 	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1031 		hlist_del_rcu(&key->node);
1032 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1033 		kfree_rcu(key, rcu);
1034 	}
1035 }
1036 
1037 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1038 				 int optlen)
1039 {
1040 	struct tcp_md5sig cmd;
1041 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1042 
1043 	if (optlen < sizeof(cmd))
1044 		return -EINVAL;
1045 
1046 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1047 		return -EFAULT;
1048 
1049 	if (sin->sin_family != AF_INET)
1050 		return -EINVAL;
1051 
1052 	if (!cmd.tcpm_keylen)
1053 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1054 				      AF_INET);
1055 
1056 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1057 		return -EINVAL;
1058 
1059 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1060 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1061 			      GFP_KERNEL);
1062 }
1063 
1064 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1065 					__be32 daddr, __be32 saddr, int nbytes)
1066 {
1067 	struct tcp4_pseudohdr *bp;
1068 	struct scatterlist sg;
1069 
1070 	bp = &hp->md5_blk.ip4;
1071 
1072 	/*
1073 	 * 1. the TCP pseudo-header (in the order: source IP address,
1074 	 * destination IP address, zero-padded protocol number, and
1075 	 * segment length)
1076 	 */
1077 	bp->saddr = saddr;
1078 	bp->daddr = daddr;
1079 	bp->pad = 0;
1080 	bp->protocol = IPPROTO_TCP;
1081 	bp->len = cpu_to_be16(nbytes);
1082 
1083 	sg_init_one(&sg, bp, sizeof(*bp));
1084 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1085 }
1086 
1087 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1088 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1089 {
1090 	struct tcp_md5sig_pool *hp;
1091 	struct hash_desc *desc;
1092 
1093 	hp = tcp_get_md5sig_pool();
1094 	if (!hp)
1095 		goto clear_hash_noput;
1096 	desc = &hp->md5_desc;
1097 
1098 	if (crypto_hash_init(desc))
1099 		goto clear_hash;
1100 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1101 		goto clear_hash;
1102 	if (tcp_md5_hash_header(hp, th))
1103 		goto clear_hash;
1104 	if (tcp_md5_hash_key(hp, key))
1105 		goto clear_hash;
1106 	if (crypto_hash_final(desc, md5_hash))
1107 		goto clear_hash;
1108 
1109 	tcp_put_md5sig_pool();
1110 	return 0;
1111 
1112 clear_hash:
1113 	tcp_put_md5sig_pool();
1114 clear_hash_noput:
1115 	memset(md5_hash, 0, 16);
1116 	return 1;
1117 }
1118 
1119 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1120 			const struct sock *sk, const struct request_sock *req,
1121 			const struct sk_buff *skb)
1122 {
1123 	struct tcp_md5sig_pool *hp;
1124 	struct hash_desc *desc;
1125 	const struct tcphdr *th = tcp_hdr(skb);
1126 	__be32 saddr, daddr;
1127 
1128 	if (sk) {
1129 		saddr = inet_sk(sk)->inet_saddr;
1130 		daddr = inet_sk(sk)->inet_daddr;
1131 	} else if (req) {
1132 		saddr = inet_rsk(req)->ir_loc_addr;
1133 		daddr = inet_rsk(req)->ir_rmt_addr;
1134 	} else {
1135 		const struct iphdr *iph = ip_hdr(skb);
1136 		saddr = iph->saddr;
1137 		daddr = iph->daddr;
1138 	}
1139 
1140 	hp = tcp_get_md5sig_pool();
1141 	if (!hp)
1142 		goto clear_hash_noput;
1143 	desc = &hp->md5_desc;
1144 
1145 	if (crypto_hash_init(desc))
1146 		goto clear_hash;
1147 
1148 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1149 		goto clear_hash;
1150 	if (tcp_md5_hash_header(hp, th))
1151 		goto clear_hash;
1152 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1153 		goto clear_hash;
1154 	if (tcp_md5_hash_key(hp, key))
1155 		goto clear_hash;
1156 	if (crypto_hash_final(desc, md5_hash))
1157 		goto clear_hash;
1158 
1159 	tcp_put_md5sig_pool();
1160 	return 0;
1161 
1162 clear_hash:
1163 	tcp_put_md5sig_pool();
1164 clear_hash_noput:
1165 	memset(md5_hash, 0, 16);
1166 	return 1;
1167 }
1168 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1169 
1170 static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1171 {
1172 	/*
1173 	 * This gets called for each TCP segment that arrives
1174 	 * so we want to be efficient.
1175 	 * We have 3 drop cases:
1176 	 * o No MD5 hash and one expected.
1177 	 * o MD5 hash and we're not expecting one.
1178 	 * o MD5 hash and its wrong.
1179 	 */
1180 	const __u8 *hash_location = NULL;
1181 	struct tcp_md5sig_key *hash_expected;
1182 	const struct iphdr *iph = ip_hdr(skb);
1183 	const struct tcphdr *th = tcp_hdr(skb);
1184 	int genhash;
1185 	unsigned char newhash[16];
1186 
1187 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1188 					  AF_INET);
1189 	hash_location = tcp_parse_md5sig_option(th);
1190 
1191 	/* We've parsed the options - do we have a hash? */
1192 	if (!hash_expected && !hash_location)
1193 		return false;
1194 
1195 	if (hash_expected && !hash_location) {
1196 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1197 		return true;
1198 	}
1199 
1200 	if (!hash_expected && hash_location) {
1201 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1202 		return true;
1203 	}
1204 
1205 	/* Okay, so this is hash_expected and hash_location -
1206 	 * so we need to calculate the checksum.
1207 	 */
1208 	genhash = tcp_v4_md5_hash_skb(newhash,
1209 				      hash_expected,
1210 				      NULL, NULL, skb);
1211 
1212 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1213 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1214 				     &iph->saddr, ntohs(th->source),
1215 				     &iph->daddr, ntohs(th->dest),
1216 				     genhash ? " tcp_v4_calc_md5_hash failed"
1217 				     : "");
1218 		return true;
1219 	}
1220 	return false;
1221 }
1222 
1223 #endif
1224 
1225 static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1226 			    struct sk_buff *skb)
1227 {
1228 	struct inet_request_sock *ireq = inet_rsk(req);
1229 
1230 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1231 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1232 	ireq->no_srccheck = inet_sk(sk)->transparent;
1233 	ireq->opt = tcp_v4_save_options(skb);
1234 }
1235 
1236 static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1237 					  const struct request_sock *req,
1238 					  bool *strict)
1239 {
1240 	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1241 
1242 	if (strict) {
1243 		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1244 			*strict = true;
1245 		else
1246 			*strict = false;
1247 	}
1248 
1249 	return dst;
1250 }
1251 
1252 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1253 	.family		=	PF_INET,
1254 	.obj_size	=	sizeof(struct tcp_request_sock),
1255 	.rtx_syn_ack	=	tcp_rtx_synack,
1256 	.send_ack	=	tcp_v4_reqsk_send_ack,
1257 	.destructor	=	tcp_v4_reqsk_destructor,
1258 	.send_reset	=	tcp_v4_send_reset,
1259 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1260 };
1261 
1262 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1263 	.mss_clamp	=	TCP_MSS_DEFAULT,
1264 #ifdef CONFIG_TCP_MD5SIG
1265 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1266 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1267 #endif
1268 	.init_req	=	tcp_v4_init_req,
1269 #ifdef CONFIG_SYN_COOKIES
1270 	.cookie_init_seq =	cookie_v4_init_sequence,
1271 #endif
1272 	.route_req	=	tcp_v4_route_req,
1273 	.init_seq	=	tcp_v4_init_sequence,
1274 	.send_synack	=	tcp_v4_send_synack,
1275 	.queue_hash_add =	inet_csk_reqsk_queue_hash_add,
1276 };
1277 
1278 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1279 {
1280 	/* Never answer to SYNs send to broadcast or multicast */
1281 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1282 		goto drop;
1283 
1284 	return tcp_conn_request(&tcp_request_sock_ops,
1285 				&tcp_request_sock_ipv4_ops, sk, skb);
1286 
1287 drop:
1288 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1289 	return 0;
1290 }
1291 EXPORT_SYMBOL(tcp_v4_conn_request);
1292 
1293 
1294 /*
1295  * The three way handshake has completed - we got a valid synack -
1296  * now create the new socket.
1297  */
1298 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1299 				  struct request_sock *req,
1300 				  struct dst_entry *dst)
1301 {
1302 	struct inet_request_sock *ireq;
1303 	struct inet_sock *newinet;
1304 	struct tcp_sock *newtp;
1305 	struct sock *newsk;
1306 #ifdef CONFIG_TCP_MD5SIG
1307 	struct tcp_md5sig_key *key;
1308 #endif
1309 	struct ip_options_rcu *inet_opt;
1310 
1311 	if (sk_acceptq_is_full(sk))
1312 		goto exit_overflow;
1313 
1314 	newsk = tcp_create_openreq_child(sk, req, skb);
1315 	if (!newsk)
1316 		goto exit_nonewsk;
1317 
1318 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1319 	inet_sk_rx_dst_set(newsk, skb);
1320 
1321 	newtp		      = tcp_sk(newsk);
1322 	newinet		      = inet_sk(newsk);
1323 	ireq		      = inet_rsk(req);
1324 	newinet->inet_daddr   = ireq->ir_rmt_addr;
1325 	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1326 	newinet->inet_saddr	      = ireq->ir_loc_addr;
1327 	inet_opt	      = ireq->opt;
1328 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1329 	ireq->opt	      = NULL;
1330 	newinet->mc_index     = inet_iif(skb);
1331 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1332 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1333 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1334 	inet_set_txhash(newsk);
1335 	if (inet_opt)
1336 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1337 	newinet->inet_id = newtp->write_seq ^ jiffies;
1338 
1339 	if (!dst) {
1340 		dst = inet_csk_route_child_sock(sk, newsk, req);
1341 		if (!dst)
1342 			goto put_and_exit;
1343 	} else {
1344 		/* syncookie case : see end of cookie_v4_check() */
1345 	}
1346 	sk_setup_caps(newsk, dst);
1347 
1348 	tcp_sync_mss(newsk, dst_mtu(dst));
1349 	newtp->advmss = dst_metric_advmss(dst);
1350 	if (tcp_sk(sk)->rx_opt.user_mss &&
1351 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1352 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1353 
1354 	tcp_initialize_rcv_mss(newsk);
1355 
1356 #ifdef CONFIG_TCP_MD5SIG
1357 	/* Copy over the MD5 key from the original socket */
1358 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1359 				AF_INET);
1360 	if (key != NULL) {
1361 		/*
1362 		 * We're using one, so create a matching key
1363 		 * on the newsk structure. If we fail to get
1364 		 * memory, then we end up not copying the key
1365 		 * across. Shucks.
1366 		 */
1367 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1368 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1369 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1370 	}
1371 #endif
1372 
1373 	if (__inet_inherit_port(sk, newsk) < 0)
1374 		goto put_and_exit;
1375 	__inet_hash_nolisten(newsk, NULL);
1376 
1377 	return newsk;
1378 
1379 exit_overflow:
1380 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1381 exit_nonewsk:
1382 	dst_release(dst);
1383 exit:
1384 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1385 	return NULL;
1386 put_and_exit:
1387 	inet_csk_prepare_forced_close(newsk);
1388 	tcp_done(newsk);
1389 	goto exit;
1390 }
1391 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1392 
1393 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1394 {
1395 	struct tcphdr *th = tcp_hdr(skb);
1396 	const struct iphdr *iph = ip_hdr(skb);
1397 	struct sock *nsk;
1398 	struct request_sock **prev;
1399 	/* Find possible connection requests. */
1400 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1401 						       iph->saddr, iph->daddr);
1402 	if (req)
1403 		return tcp_check_req(sk, skb, req, prev, false);
1404 
1405 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1406 			th->source, iph->daddr, th->dest, inet_iif(skb));
1407 
1408 	if (nsk) {
1409 		if (nsk->sk_state != TCP_TIME_WAIT) {
1410 			bh_lock_sock(nsk);
1411 			return nsk;
1412 		}
1413 		inet_twsk_put(inet_twsk(nsk));
1414 		return NULL;
1415 	}
1416 
1417 #ifdef CONFIG_SYN_COOKIES
1418 	if (!th->syn)
1419 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1420 #endif
1421 	return sk;
1422 }
1423 
1424 /* The socket must have it's spinlock held when we get
1425  * here.
1426  *
1427  * We have a potential double-lock case here, so even when
1428  * doing backlog processing we use the BH locking scheme.
1429  * This is because we cannot sleep with the original spinlock
1430  * held.
1431  */
1432 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1433 {
1434 	struct sock *rsk;
1435 #ifdef CONFIG_TCP_MD5SIG
1436 	/*
1437 	 * We really want to reject the packet as early as possible
1438 	 * if:
1439 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1440 	 *  o There is an MD5 option and we're not expecting one
1441 	 */
1442 	if (tcp_v4_inbound_md5_hash(sk, skb))
1443 		goto discard;
1444 #endif
1445 
1446 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1447 		struct dst_entry *dst = sk->sk_rx_dst;
1448 
1449 		sock_rps_save_rxhash(sk, skb);
1450 		if (dst) {
1451 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1452 			    dst->ops->check(dst, 0) == NULL) {
1453 				dst_release(dst);
1454 				sk->sk_rx_dst = NULL;
1455 			}
1456 		}
1457 		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1458 		return 0;
1459 	}
1460 
1461 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1462 		goto csum_err;
1463 
1464 	if (sk->sk_state == TCP_LISTEN) {
1465 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1466 		if (!nsk)
1467 			goto discard;
1468 
1469 		if (nsk != sk) {
1470 			sock_rps_save_rxhash(nsk, skb);
1471 			if (tcp_child_process(sk, nsk, skb)) {
1472 				rsk = nsk;
1473 				goto reset;
1474 			}
1475 			return 0;
1476 		}
1477 	} else
1478 		sock_rps_save_rxhash(sk, skb);
1479 
1480 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1481 		rsk = sk;
1482 		goto reset;
1483 	}
1484 	return 0;
1485 
1486 reset:
1487 	tcp_v4_send_reset(rsk, skb);
1488 discard:
1489 	kfree_skb(skb);
1490 	/* Be careful here. If this function gets more complicated and
1491 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1492 	 * might be destroyed here. This current version compiles correctly,
1493 	 * but you have been warned.
1494 	 */
1495 	return 0;
1496 
1497 csum_err:
1498 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1499 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1500 	goto discard;
1501 }
1502 EXPORT_SYMBOL(tcp_v4_do_rcv);
1503 
1504 void tcp_v4_early_demux(struct sk_buff *skb)
1505 {
1506 	const struct iphdr *iph;
1507 	const struct tcphdr *th;
1508 	struct sock *sk;
1509 
1510 	if (skb->pkt_type != PACKET_HOST)
1511 		return;
1512 
1513 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1514 		return;
1515 
1516 	iph = ip_hdr(skb);
1517 	th = tcp_hdr(skb);
1518 
1519 	if (th->doff < sizeof(struct tcphdr) / 4)
1520 		return;
1521 
1522 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1523 				       iph->saddr, th->source,
1524 				       iph->daddr, ntohs(th->dest),
1525 				       skb->skb_iif);
1526 	if (sk) {
1527 		skb->sk = sk;
1528 		skb->destructor = sock_edemux;
1529 		if (sk->sk_state != TCP_TIME_WAIT) {
1530 			struct dst_entry *dst = sk->sk_rx_dst;
1531 
1532 			if (dst)
1533 				dst = dst_check(dst, 0);
1534 			if (dst &&
1535 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1536 				skb_dst_set_noref(skb, dst);
1537 		}
1538 	}
1539 }
1540 
1541 /* Packet is added to VJ-style prequeue for processing in process
1542  * context, if a reader task is waiting. Apparently, this exciting
1543  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1544  * failed somewhere. Latency? Burstiness? Well, at least now we will
1545  * see, why it failed. 8)8)				  --ANK
1546  *
1547  */
1548 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1549 {
1550 	struct tcp_sock *tp = tcp_sk(sk);
1551 
1552 	if (sysctl_tcp_low_latency || !tp->ucopy.task)
1553 		return false;
1554 
1555 	if (skb->len <= tcp_hdrlen(skb) &&
1556 	    skb_queue_len(&tp->ucopy.prequeue) == 0)
1557 		return false;
1558 
1559 	skb_dst_force(skb);
1560 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
1561 	tp->ucopy.memory += skb->truesize;
1562 	if (tp->ucopy.memory > sk->sk_rcvbuf) {
1563 		struct sk_buff *skb1;
1564 
1565 		BUG_ON(sock_owned_by_user(sk));
1566 
1567 		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1568 			sk_backlog_rcv(sk, skb1);
1569 			NET_INC_STATS_BH(sock_net(sk),
1570 					 LINUX_MIB_TCPPREQUEUEDROPPED);
1571 		}
1572 
1573 		tp->ucopy.memory = 0;
1574 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1575 		wake_up_interruptible_sync_poll(sk_sleep(sk),
1576 					   POLLIN | POLLRDNORM | POLLRDBAND);
1577 		if (!inet_csk_ack_scheduled(sk))
1578 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1579 						  (3 * tcp_rto_min(sk)) / 4,
1580 						  TCP_RTO_MAX);
1581 	}
1582 	return true;
1583 }
1584 EXPORT_SYMBOL(tcp_prequeue);
1585 
1586 /*
1587  *	From tcp_input.c
1588  */
1589 
1590 int tcp_v4_rcv(struct sk_buff *skb)
1591 {
1592 	const struct iphdr *iph;
1593 	const struct tcphdr *th;
1594 	struct sock *sk;
1595 	int ret;
1596 	struct net *net = dev_net(skb->dev);
1597 
1598 	if (skb->pkt_type != PACKET_HOST)
1599 		goto discard_it;
1600 
1601 	/* Count it even if it's bad */
1602 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1603 
1604 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1605 		goto discard_it;
1606 
1607 	th = tcp_hdr(skb);
1608 
1609 	if (th->doff < sizeof(struct tcphdr) / 4)
1610 		goto bad_packet;
1611 	if (!pskb_may_pull(skb, th->doff * 4))
1612 		goto discard_it;
1613 
1614 	/* An explanation is required here, I think.
1615 	 * Packet length and doff are validated by header prediction,
1616 	 * provided case of th->doff==0 is eliminated.
1617 	 * So, we defer the checks. */
1618 
1619 	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1620 		goto csum_error;
1621 
1622 	th = tcp_hdr(skb);
1623 	iph = ip_hdr(skb);
1624 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1625 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1626 				    skb->len - th->doff * 4);
1627 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1628 	TCP_SKB_CB(skb)->when	 = 0;
1629 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1630 	TCP_SKB_CB(skb)->sacked	 = 0;
1631 
1632 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1633 	if (!sk)
1634 		goto no_tcp_socket;
1635 
1636 process:
1637 	if (sk->sk_state == TCP_TIME_WAIT)
1638 		goto do_time_wait;
1639 
1640 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1641 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1642 		goto discard_and_relse;
1643 	}
1644 
1645 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1646 		goto discard_and_relse;
1647 	nf_reset(skb);
1648 
1649 	if (sk_filter(sk, skb))
1650 		goto discard_and_relse;
1651 
1652 	sk_mark_napi_id(sk, skb);
1653 	skb->dev = NULL;
1654 
1655 	bh_lock_sock_nested(sk);
1656 	ret = 0;
1657 	if (!sock_owned_by_user(sk)) {
1658 #ifdef CONFIG_NET_DMA
1659 		struct tcp_sock *tp = tcp_sk(sk);
1660 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1661 			tp->ucopy.dma_chan = net_dma_find_channel();
1662 		if (tp->ucopy.dma_chan)
1663 			ret = tcp_v4_do_rcv(sk, skb);
1664 		else
1665 #endif
1666 		{
1667 			if (!tcp_prequeue(sk, skb))
1668 				ret = tcp_v4_do_rcv(sk, skb);
1669 		}
1670 	} else if (unlikely(sk_add_backlog(sk, skb,
1671 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
1672 		bh_unlock_sock(sk);
1673 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1674 		goto discard_and_relse;
1675 	}
1676 	bh_unlock_sock(sk);
1677 
1678 	sock_put(sk);
1679 
1680 	return ret;
1681 
1682 no_tcp_socket:
1683 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1684 		goto discard_it;
1685 
1686 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1687 csum_error:
1688 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1689 bad_packet:
1690 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1691 	} else {
1692 		tcp_v4_send_reset(NULL, skb);
1693 	}
1694 
1695 discard_it:
1696 	/* Discard frame. */
1697 	kfree_skb(skb);
1698 	return 0;
1699 
1700 discard_and_relse:
1701 	sock_put(sk);
1702 	goto discard_it;
1703 
1704 do_time_wait:
1705 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1706 		inet_twsk_put(inet_twsk(sk));
1707 		goto discard_it;
1708 	}
1709 
1710 	if (skb->len < (th->doff << 2)) {
1711 		inet_twsk_put(inet_twsk(sk));
1712 		goto bad_packet;
1713 	}
1714 	if (tcp_checksum_complete(skb)) {
1715 		inet_twsk_put(inet_twsk(sk));
1716 		goto csum_error;
1717 	}
1718 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1719 	case TCP_TW_SYN: {
1720 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1721 							&tcp_hashinfo,
1722 							iph->saddr, th->source,
1723 							iph->daddr, th->dest,
1724 							inet_iif(skb));
1725 		if (sk2) {
1726 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1727 			inet_twsk_put(inet_twsk(sk));
1728 			sk = sk2;
1729 			goto process;
1730 		}
1731 		/* Fall through to ACK */
1732 	}
1733 	case TCP_TW_ACK:
1734 		tcp_v4_timewait_ack(sk, skb);
1735 		break;
1736 	case TCP_TW_RST:
1737 		goto no_tcp_socket;
1738 	case TCP_TW_SUCCESS:;
1739 	}
1740 	goto discard_it;
1741 }
1742 
1743 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1744 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1745 	.twsk_unique	= tcp_twsk_unique,
1746 	.twsk_destructor= tcp_twsk_destructor,
1747 };
1748 
1749 void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1750 {
1751 	struct dst_entry *dst = skb_dst(skb);
1752 
1753 	dst_hold(dst);
1754 	sk->sk_rx_dst = dst;
1755 	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1756 }
1757 EXPORT_SYMBOL(inet_sk_rx_dst_set);
1758 
1759 const struct inet_connection_sock_af_ops ipv4_specific = {
1760 	.queue_xmit	   = ip_queue_xmit,
1761 	.send_check	   = tcp_v4_send_check,
1762 	.rebuild_header	   = inet_sk_rebuild_header,
1763 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1764 	.conn_request	   = tcp_v4_conn_request,
1765 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1766 	.net_header_len	   = sizeof(struct iphdr),
1767 	.setsockopt	   = ip_setsockopt,
1768 	.getsockopt	   = ip_getsockopt,
1769 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1770 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1771 	.bind_conflict	   = inet_csk_bind_conflict,
1772 #ifdef CONFIG_COMPAT
1773 	.compat_setsockopt = compat_ip_setsockopt,
1774 	.compat_getsockopt = compat_ip_getsockopt,
1775 #endif
1776 };
1777 EXPORT_SYMBOL(ipv4_specific);
1778 
1779 #ifdef CONFIG_TCP_MD5SIG
1780 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1781 	.md5_lookup		= tcp_v4_md5_lookup,
1782 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1783 	.md5_parse		= tcp_v4_parse_md5_keys,
1784 };
1785 #endif
1786 
1787 /* NOTE: A lot of things set to zero explicitly by call to
1788  *       sk_alloc() so need not be done here.
1789  */
1790 static int tcp_v4_init_sock(struct sock *sk)
1791 {
1792 	struct inet_connection_sock *icsk = inet_csk(sk);
1793 
1794 	tcp_init_sock(sk);
1795 
1796 	icsk->icsk_af_ops = &ipv4_specific;
1797 
1798 #ifdef CONFIG_TCP_MD5SIG
1799 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1800 #endif
1801 
1802 	return 0;
1803 }
1804 
1805 void tcp_v4_destroy_sock(struct sock *sk)
1806 {
1807 	struct tcp_sock *tp = tcp_sk(sk);
1808 
1809 	tcp_clear_xmit_timers(sk);
1810 
1811 	tcp_cleanup_congestion_control(sk);
1812 
1813 	/* Cleanup up the write buffer. */
1814 	tcp_write_queue_purge(sk);
1815 
1816 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1817 	__skb_queue_purge(&tp->out_of_order_queue);
1818 
1819 #ifdef CONFIG_TCP_MD5SIG
1820 	/* Clean up the MD5 key list, if any */
1821 	if (tp->md5sig_info) {
1822 		tcp_clear_md5_list(sk);
1823 		kfree_rcu(tp->md5sig_info, rcu);
1824 		tp->md5sig_info = NULL;
1825 	}
1826 #endif
1827 
1828 #ifdef CONFIG_NET_DMA
1829 	/* Cleans up our sk_async_wait_queue */
1830 	__skb_queue_purge(&sk->sk_async_wait_queue);
1831 #endif
1832 
1833 	/* Clean prequeue, it must be empty really */
1834 	__skb_queue_purge(&tp->ucopy.prequeue);
1835 
1836 	/* Clean up a referenced TCP bind bucket. */
1837 	if (inet_csk(sk)->icsk_bind_hash)
1838 		inet_put_port(sk);
1839 
1840 	BUG_ON(tp->fastopen_rsk != NULL);
1841 
1842 	/* If socket is aborted during connect operation */
1843 	tcp_free_fastopen_req(tp);
1844 
1845 	sk_sockets_allocated_dec(sk);
1846 	sock_release_memcg(sk);
1847 }
1848 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1849 
1850 #ifdef CONFIG_PROC_FS
1851 /* Proc filesystem TCP sock list dumping. */
1852 
1853 /*
1854  * Get next listener socket follow cur.  If cur is NULL, get first socket
1855  * starting from bucket given in st->bucket; when st->bucket is zero the
1856  * very first socket in the hash table is returned.
1857  */
1858 static void *listening_get_next(struct seq_file *seq, void *cur)
1859 {
1860 	struct inet_connection_sock *icsk;
1861 	struct hlist_nulls_node *node;
1862 	struct sock *sk = cur;
1863 	struct inet_listen_hashbucket *ilb;
1864 	struct tcp_iter_state *st = seq->private;
1865 	struct net *net = seq_file_net(seq);
1866 
1867 	if (!sk) {
1868 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1869 		spin_lock_bh(&ilb->lock);
1870 		sk = sk_nulls_head(&ilb->head);
1871 		st->offset = 0;
1872 		goto get_sk;
1873 	}
1874 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1875 	++st->num;
1876 	++st->offset;
1877 
1878 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1879 		struct request_sock *req = cur;
1880 
1881 		icsk = inet_csk(st->syn_wait_sk);
1882 		req = req->dl_next;
1883 		while (1) {
1884 			while (req) {
1885 				if (req->rsk_ops->family == st->family) {
1886 					cur = req;
1887 					goto out;
1888 				}
1889 				req = req->dl_next;
1890 			}
1891 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1892 				break;
1893 get_req:
1894 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1895 		}
1896 		sk	  = sk_nulls_next(st->syn_wait_sk);
1897 		st->state = TCP_SEQ_STATE_LISTENING;
1898 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1899 	} else {
1900 		icsk = inet_csk(sk);
1901 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1902 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1903 			goto start_req;
1904 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1905 		sk = sk_nulls_next(sk);
1906 	}
1907 get_sk:
1908 	sk_nulls_for_each_from(sk, node) {
1909 		if (!net_eq(sock_net(sk), net))
1910 			continue;
1911 		if (sk->sk_family == st->family) {
1912 			cur = sk;
1913 			goto out;
1914 		}
1915 		icsk = inet_csk(sk);
1916 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1917 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1918 start_req:
1919 			st->uid		= sock_i_uid(sk);
1920 			st->syn_wait_sk = sk;
1921 			st->state	= TCP_SEQ_STATE_OPENREQ;
1922 			st->sbucket	= 0;
1923 			goto get_req;
1924 		}
1925 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1926 	}
1927 	spin_unlock_bh(&ilb->lock);
1928 	st->offset = 0;
1929 	if (++st->bucket < INET_LHTABLE_SIZE) {
1930 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1931 		spin_lock_bh(&ilb->lock);
1932 		sk = sk_nulls_head(&ilb->head);
1933 		goto get_sk;
1934 	}
1935 	cur = NULL;
1936 out:
1937 	return cur;
1938 }
1939 
1940 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1941 {
1942 	struct tcp_iter_state *st = seq->private;
1943 	void *rc;
1944 
1945 	st->bucket = 0;
1946 	st->offset = 0;
1947 	rc = listening_get_next(seq, NULL);
1948 
1949 	while (rc && *pos) {
1950 		rc = listening_get_next(seq, rc);
1951 		--*pos;
1952 	}
1953 	return rc;
1954 }
1955 
1956 static inline bool empty_bucket(const struct tcp_iter_state *st)
1957 {
1958 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1959 }
1960 
1961 /*
1962  * Get first established socket starting from bucket given in st->bucket.
1963  * If st->bucket is zero, the very first socket in the hash is returned.
1964  */
1965 static void *established_get_first(struct seq_file *seq)
1966 {
1967 	struct tcp_iter_state *st = seq->private;
1968 	struct net *net = seq_file_net(seq);
1969 	void *rc = NULL;
1970 
1971 	st->offset = 0;
1972 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1973 		struct sock *sk;
1974 		struct hlist_nulls_node *node;
1975 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1976 
1977 		/* Lockless fast path for the common case of empty buckets */
1978 		if (empty_bucket(st))
1979 			continue;
1980 
1981 		spin_lock_bh(lock);
1982 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1983 			if (sk->sk_family != st->family ||
1984 			    !net_eq(sock_net(sk), net)) {
1985 				continue;
1986 			}
1987 			rc = sk;
1988 			goto out;
1989 		}
1990 		spin_unlock_bh(lock);
1991 	}
1992 out:
1993 	return rc;
1994 }
1995 
1996 static void *established_get_next(struct seq_file *seq, void *cur)
1997 {
1998 	struct sock *sk = cur;
1999 	struct hlist_nulls_node *node;
2000 	struct tcp_iter_state *st = seq->private;
2001 	struct net *net = seq_file_net(seq);
2002 
2003 	++st->num;
2004 	++st->offset;
2005 
2006 	sk = sk_nulls_next(sk);
2007 
2008 	sk_nulls_for_each_from(sk, node) {
2009 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2010 			return sk;
2011 	}
2012 
2013 	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2014 	++st->bucket;
2015 	return established_get_first(seq);
2016 }
2017 
2018 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2019 {
2020 	struct tcp_iter_state *st = seq->private;
2021 	void *rc;
2022 
2023 	st->bucket = 0;
2024 	rc = established_get_first(seq);
2025 
2026 	while (rc && pos) {
2027 		rc = established_get_next(seq, rc);
2028 		--pos;
2029 	}
2030 	return rc;
2031 }
2032 
2033 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2034 {
2035 	void *rc;
2036 	struct tcp_iter_state *st = seq->private;
2037 
2038 	st->state = TCP_SEQ_STATE_LISTENING;
2039 	rc	  = listening_get_idx(seq, &pos);
2040 
2041 	if (!rc) {
2042 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2043 		rc	  = established_get_idx(seq, pos);
2044 	}
2045 
2046 	return rc;
2047 }
2048 
2049 static void *tcp_seek_last_pos(struct seq_file *seq)
2050 {
2051 	struct tcp_iter_state *st = seq->private;
2052 	int offset = st->offset;
2053 	int orig_num = st->num;
2054 	void *rc = NULL;
2055 
2056 	switch (st->state) {
2057 	case TCP_SEQ_STATE_OPENREQ:
2058 	case TCP_SEQ_STATE_LISTENING:
2059 		if (st->bucket >= INET_LHTABLE_SIZE)
2060 			break;
2061 		st->state = TCP_SEQ_STATE_LISTENING;
2062 		rc = listening_get_next(seq, NULL);
2063 		while (offset-- && rc)
2064 			rc = listening_get_next(seq, rc);
2065 		if (rc)
2066 			break;
2067 		st->bucket = 0;
2068 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2069 		/* Fallthrough */
2070 	case TCP_SEQ_STATE_ESTABLISHED:
2071 		if (st->bucket > tcp_hashinfo.ehash_mask)
2072 			break;
2073 		rc = established_get_first(seq);
2074 		while (offset-- && rc)
2075 			rc = established_get_next(seq, rc);
2076 	}
2077 
2078 	st->num = orig_num;
2079 
2080 	return rc;
2081 }
2082 
2083 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2084 {
2085 	struct tcp_iter_state *st = seq->private;
2086 	void *rc;
2087 
2088 	if (*pos && *pos == st->last_pos) {
2089 		rc = tcp_seek_last_pos(seq);
2090 		if (rc)
2091 			goto out;
2092 	}
2093 
2094 	st->state = TCP_SEQ_STATE_LISTENING;
2095 	st->num = 0;
2096 	st->bucket = 0;
2097 	st->offset = 0;
2098 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2099 
2100 out:
2101 	st->last_pos = *pos;
2102 	return rc;
2103 }
2104 
2105 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2106 {
2107 	struct tcp_iter_state *st = seq->private;
2108 	void *rc = NULL;
2109 
2110 	if (v == SEQ_START_TOKEN) {
2111 		rc = tcp_get_idx(seq, 0);
2112 		goto out;
2113 	}
2114 
2115 	switch (st->state) {
2116 	case TCP_SEQ_STATE_OPENREQ:
2117 	case TCP_SEQ_STATE_LISTENING:
2118 		rc = listening_get_next(seq, v);
2119 		if (!rc) {
2120 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2121 			st->bucket = 0;
2122 			st->offset = 0;
2123 			rc	  = established_get_first(seq);
2124 		}
2125 		break;
2126 	case TCP_SEQ_STATE_ESTABLISHED:
2127 		rc = established_get_next(seq, v);
2128 		break;
2129 	}
2130 out:
2131 	++*pos;
2132 	st->last_pos = *pos;
2133 	return rc;
2134 }
2135 
2136 static void tcp_seq_stop(struct seq_file *seq, void *v)
2137 {
2138 	struct tcp_iter_state *st = seq->private;
2139 
2140 	switch (st->state) {
2141 	case TCP_SEQ_STATE_OPENREQ:
2142 		if (v) {
2143 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2144 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2145 		}
2146 	case TCP_SEQ_STATE_LISTENING:
2147 		if (v != SEQ_START_TOKEN)
2148 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2149 		break;
2150 	case TCP_SEQ_STATE_ESTABLISHED:
2151 		if (v)
2152 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2153 		break;
2154 	}
2155 }
2156 
2157 int tcp_seq_open(struct inode *inode, struct file *file)
2158 {
2159 	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2160 	struct tcp_iter_state *s;
2161 	int err;
2162 
2163 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2164 			  sizeof(struct tcp_iter_state));
2165 	if (err < 0)
2166 		return err;
2167 
2168 	s = ((struct seq_file *)file->private_data)->private;
2169 	s->family		= afinfo->family;
2170 	s->last_pos 		= 0;
2171 	return 0;
2172 }
2173 EXPORT_SYMBOL(tcp_seq_open);
2174 
2175 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2176 {
2177 	int rc = 0;
2178 	struct proc_dir_entry *p;
2179 
2180 	afinfo->seq_ops.start		= tcp_seq_start;
2181 	afinfo->seq_ops.next		= tcp_seq_next;
2182 	afinfo->seq_ops.stop		= tcp_seq_stop;
2183 
2184 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2185 			     afinfo->seq_fops, afinfo);
2186 	if (!p)
2187 		rc = -ENOMEM;
2188 	return rc;
2189 }
2190 EXPORT_SYMBOL(tcp_proc_register);
2191 
2192 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2193 {
2194 	remove_proc_entry(afinfo->name, net->proc_net);
2195 }
2196 EXPORT_SYMBOL(tcp_proc_unregister);
2197 
2198 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2199 			 struct seq_file *f, int i, kuid_t uid)
2200 {
2201 	const struct inet_request_sock *ireq = inet_rsk(req);
2202 	long delta = req->expires - jiffies;
2203 
2204 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2205 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2206 		i,
2207 		ireq->ir_loc_addr,
2208 		ntohs(inet_sk(sk)->inet_sport),
2209 		ireq->ir_rmt_addr,
2210 		ntohs(ireq->ir_rmt_port),
2211 		TCP_SYN_RECV,
2212 		0, 0, /* could print option size, but that is af dependent. */
2213 		1,    /* timers active (only the expire timer) */
2214 		jiffies_delta_to_clock_t(delta),
2215 		req->num_timeout,
2216 		from_kuid_munged(seq_user_ns(f), uid),
2217 		0,  /* non standard timer */
2218 		0, /* open_requests have no inode */
2219 		atomic_read(&sk->sk_refcnt),
2220 		req);
2221 }
2222 
2223 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2224 {
2225 	int timer_active;
2226 	unsigned long timer_expires;
2227 	const struct tcp_sock *tp = tcp_sk(sk);
2228 	const struct inet_connection_sock *icsk = inet_csk(sk);
2229 	const struct inet_sock *inet = inet_sk(sk);
2230 	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2231 	__be32 dest = inet->inet_daddr;
2232 	__be32 src = inet->inet_rcv_saddr;
2233 	__u16 destp = ntohs(inet->inet_dport);
2234 	__u16 srcp = ntohs(inet->inet_sport);
2235 	int rx_queue;
2236 
2237 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2238 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2239 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2240 		timer_active	= 1;
2241 		timer_expires	= icsk->icsk_timeout;
2242 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2243 		timer_active	= 4;
2244 		timer_expires	= icsk->icsk_timeout;
2245 	} else if (timer_pending(&sk->sk_timer)) {
2246 		timer_active	= 2;
2247 		timer_expires	= sk->sk_timer.expires;
2248 	} else {
2249 		timer_active	= 0;
2250 		timer_expires = jiffies;
2251 	}
2252 
2253 	if (sk->sk_state == TCP_LISTEN)
2254 		rx_queue = sk->sk_ack_backlog;
2255 	else
2256 		/*
2257 		 * because we dont lock socket, we might find a transient negative value
2258 		 */
2259 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2260 
2261 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2262 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2263 		i, src, srcp, dest, destp, sk->sk_state,
2264 		tp->write_seq - tp->snd_una,
2265 		rx_queue,
2266 		timer_active,
2267 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2268 		icsk->icsk_retransmits,
2269 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2270 		icsk->icsk_probes_out,
2271 		sock_i_ino(sk),
2272 		atomic_read(&sk->sk_refcnt), sk,
2273 		jiffies_to_clock_t(icsk->icsk_rto),
2274 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2275 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2276 		tp->snd_cwnd,
2277 		sk->sk_state == TCP_LISTEN ?
2278 		    (fastopenq ? fastopenq->max_qlen : 0) :
2279 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2280 }
2281 
2282 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2283 			       struct seq_file *f, int i)
2284 {
2285 	__be32 dest, src;
2286 	__u16 destp, srcp;
2287 	s32 delta = tw->tw_ttd - inet_tw_time_stamp();
2288 
2289 	dest  = tw->tw_daddr;
2290 	src   = tw->tw_rcv_saddr;
2291 	destp = ntohs(tw->tw_dport);
2292 	srcp  = ntohs(tw->tw_sport);
2293 
2294 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2295 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2296 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2297 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2298 		atomic_read(&tw->tw_refcnt), tw);
2299 }
2300 
2301 #define TMPSZ 150
2302 
2303 static int tcp4_seq_show(struct seq_file *seq, void *v)
2304 {
2305 	struct tcp_iter_state *st;
2306 	struct sock *sk = v;
2307 
2308 	seq_setwidth(seq, TMPSZ - 1);
2309 	if (v == SEQ_START_TOKEN) {
2310 		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2311 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2312 			   "inode");
2313 		goto out;
2314 	}
2315 	st = seq->private;
2316 
2317 	switch (st->state) {
2318 	case TCP_SEQ_STATE_LISTENING:
2319 	case TCP_SEQ_STATE_ESTABLISHED:
2320 		if (sk->sk_state == TCP_TIME_WAIT)
2321 			get_timewait4_sock(v, seq, st->num);
2322 		else
2323 			get_tcp4_sock(v, seq, st->num);
2324 		break;
2325 	case TCP_SEQ_STATE_OPENREQ:
2326 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
2327 		break;
2328 	}
2329 out:
2330 	seq_pad(seq, '\n');
2331 	return 0;
2332 }
2333 
2334 static const struct file_operations tcp_afinfo_seq_fops = {
2335 	.owner   = THIS_MODULE,
2336 	.open    = tcp_seq_open,
2337 	.read    = seq_read,
2338 	.llseek  = seq_lseek,
2339 	.release = seq_release_net
2340 };
2341 
2342 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2343 	.name		= "tcp",
2344 	.family		= AF_INET,
2345 	.seq_fops	= &tcp_afinfo_seq_fops,
2346 	.seq_ops	= {
2347 		.show		= tcp4_seq_show,
2348 	},
2349 };
2350 
2351 static int __net_init tcp4_proc_init_net(struct net *net)
2352 {
2353 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2354 }
2355 
2356 static void __net_exit tcp4_proc_exit_net(struct net *net)
2357 {
2358 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2359 }
2360 
2361 static struct pernet_operations tcp4_net_ops = {
2362 	.init = tcp4_proc_init_net,
2363 	.exit = tcp4_proc_exit_net,
2364 };
2365 
2366 int __init tcp4_proc_init(void)
2367 {
2368 	return register_pernet_subsys(&tcp4_net_ops);
2369 }
2370 
2371 void tcp4_proc_exit(void)
2372 {
2373 	unregister_pernet_subsys(&tcp4_net_ops);
2374 }
2375 #endif /* CONFIG_PROC_FS */
2376 
2377 struct proto tcp_prot = {
2378 	.name			= "TCP",
2379 	.owner			= THIS_MODULE,
2380 	.close			= tcp_close,
2381 	.connect		= tcp_v4_connect,
2382 	.disconnect		= tcp_disconnect,
2383 	.accept			= inet_csk_accept,
2384 	.ioctl			= tcp_ioctl,
2385 	.init			= tcp_v4_init_sock,
2386 	.destroy		= tcp_v4_destroy_sock,
2387 	.shutdown		= tcp_shutdown,
2388 	.setsockopt		= tcp_setsockopt,
2389 	.getsockopt		= tcp_getsockopt,
2390 	.recvmsg		= tcp_recvmsg,
2391 	.sendmsg		= tcp_sendmsg,
2392 	.sendpage		= tcp_sendpage,
2393 	.backlog_rcv		= tcp_v4_do_rcv,
2394 	.release_cb		= tcp_release_cb,
2395 	.mtu_reduced		= tcp_v4_mtu_reduced,
2396 	.hash			= inet_hash,
2397 	.unhash			= inet_unhash,
2398 	.get_port		= inet_csk_get_port,
2399 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2400 	.stream_memory_free	= tcp_stream_memory_free,
2401 	.sockets_allocated	= &tcp_sockets_allocated,
2402 	.orphan_count		= &tcp_orphan_count,
2403 	.memory_allocated	= &tcp_memory_allocated,
2404 	.memory_pressure	= &tcp_memory_pressure,
2405 	.sysctl_mem		= sysctl_tcp_mem,
2406 	.sysctl_wmem		= sysctl_tcp_wmem,
2407 	.sysctl_rmem		= sysctl_tcp_rmem,
2408 	.max_header		= MAX_TCP_HEADER,
2409 	.obj_size		= sizeof(struct tcp_sock),
2410 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2411 	.twsk_prot		= &tcp_timewait_sock_ops,
2412 	.rsk_prot		= &tcp_request_sock_ops,
2413 	.h.hashinfo		= &tcp_hashinfo,
2414 	.no_autobind		= true,
2415 #ifdef CONFIG_COMPAT
2416 	.compat_setsockopt	= compat_tcp_setsockopt,
2417 	.compat_getsockopt	= compat_tcp_getsockopt,
2418 #endif
2419 #ifdef CONFIG_MEMCG_KMEM
2420 	.init_cgroup		= tcp_init_cgroup,
2421 	.destroy_cgroup		= tcp_destroy_cgroup,
2422 	.proto_cgroup		= tcp_proto_cgroup,
2423 #endif
2424 };
2425 EXPORT_SYMBOL(tcp_prot);
2426 
2427 static int __net_init tcp_sk_init(struct net *net)
2428 {
2429 	net->ipv4.sysctl_tcp_ecn = 2;
2430 	return 0;
2431 }
2432 
2433 static void __net_exit tcp_sk_exit(struct net *net)
2434 {
2435 }
2436 
2437 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2438 {
2439 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2440 }
2441 
2442 static struct pernet_operations __net_initdata tcp_sk_ops = {
2443        .init	   = tcp_sk_init,
2444        .exit	   = tcp_sk_exit,
2445        .exit_batch = tcp_sk_exit_batch,
2446 };
2447 
2448 void __init tcp_v4_init(void)
2449 {
2450 	inet_hashinfo_init(&tcp_hashinfo);
2451 	if (register_pernet_subsys(&tcp_sk_ops))
2452 		panic("Failed to create the TCP control socket.\n");
2453 }
2454