xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision 3932b9ca)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 #define pr_fmt(fmt) "TCP: " fmt
54 
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65 
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/netdma.h>
76 #include <net/secure_seq.h>
77 #include <net/tcp_memcontrol.h>
78 #include <net/busy_poll.h>
79 
80 #include <linux/inet.h>
81 #include <linux/ipv6.h>
82 #include <linux/stddef.h>
83 #include <linux/proc_fs.h>
84 #include <linux/seq_file.h>
85 
86 #include <linux/crypto.h>
87 #include <linux/scatterlist.h>
88 
89 int sysctl_tcp_tw_reuse __read_mostly;
90 int sysctl_tcp_low_latency __read_mostly;
91 EXPORT_SYMBOL(sysctl_tcp_low_latency);
92 
93 
94 #ifdef CONFIG_TCP_MD5SIG
95 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
96 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
97 #endif
98 
99 struct inet_hashinfo tcp_hashinfo;
100 EXPORT_SYMBOL(tcp_hashinfo);
101 
102 static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
103 {
104 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 					  ip_hdr(skb)->saddr,
106 					  tcp_hdr(skb)->dest,
107 					  tcp_hdr(skb)->source);
108 }
109 
110 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111 {
112 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 	struct tcp_sock *tp = tcp_sk(sk);
114 
115 	/* With PAWS, it is safe from the viewpoint
116 	   of data integrity. Even without PAWS it is safe provided sequence
117 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118 
119 	   Actually, the idea is close to VJ's one, only timestamp cache is
120 	   held not per host, but per port pair and TW bucket is used as state
121 	   holder.
122 
123 	   If TW bucket has been already destroyed we fall back to VJ's scheme
124 	   and use initial timestamp retrieved from peer table.
125 	 */
126 	if (tcptw->tw_ts_recent_stamp &&
127 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
128 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
129 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 		if (tp->write_seq == 0)
131 			tp->write_seq = 1;
132 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
133 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 		sock_hold(sktw);
135 		return 1;
136 	}
137 
138 	return 0;
139 }
140 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141 
142 /* This will initiate an outgoing connection. */
143 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144 {
145 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
146 	struct inet_sock *inet = inet_sk(sk);
147 	struct tcp_sock *tp = tcp_sk(sk);
148 	__be16 orig_sport, orig_dport;
149 	__be32 daddr, nexthop;
150 	struct flowi4 *fl4;
151 	struct rtable *rt;
152 	int err;
153 	struct ip_options_rcu *inet_opt;
154 
155 	if (addr_len < sizeof(struct sockaddr_in))
156 		return -EINVAL;
157 
158 	if (usin->sin_family != AF_INET)
159 		return -EAFNOSUPPORT;
160 
161 	nexthop = daddr = usin->sin_addr.s_addr;
162 	inet_opt = rcu_dereference_protected(inet->inet_opt,
163 					     sock_owned_by_user(sk));
164 	if (inet_opt && inet_opt->opt.srr) {
165 		if (!daddr)
166 			return -EINVAL;
167 		nexthop = inet_opt->opt.faddr;
168 	}
169 
170 	orig_sport = inet->inet_sport;
171 	orig_dport = usin->sin_port;
172 	fl4 = &inet->cork.fl.u.ip4;
173 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
174 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 			      IPPROTO_TCP,
176 			      orig_sport, orig_dport, sk);
177 	if (IS_ERR(rt)) {
178 		err = PTR_ERR(rt);
179 		if (err == -ENETUNREACH)
180 			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
181 		return err;
182 	}
183 
184 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 		ip_rt_put(rt);
186 		return -ENETUNREACH;
187 	}
188 
189 	if (!inet_opt || !inet_opt->opt.srr)
190 		daddr = fl4->daddr;
191 
192 	if (!inet->inet_saddr)
193 		inet->inet_saddr = fl4->saddr;
194 	inet->inet_rcv_saddr = inet->inet_saddr;
195 
196 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
197 		/* Reset inherited state */
198 		tp->rx_opt.ts_recent	   = 0;
199 		tp->rx_opt.ts_recent_stamp = 0;
200 		if (likely(!tp->repair))
201 			tp->write_seq	   = 0;
202 	}
203 
204 	if (tcp_death_row.sysctl_tw_recycle &&
205 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 		tcp_fetch_timewait_stamp(sk, &rt->dst);
207 
208 	inet->inet_dport = usin->sin_port;
209 	inet->inet_daddr = daddr;
210 
211 	inet_set_txhash(sk);
212 
213 	inet_csk(sk)->icsk_ext_hdr_len = 0;
214 	if (inet_opt)
215 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
216 
217 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
218 
219 	/* Socket identity is still unknown (sport may be zero).
220 	 * However we set state to SYN-SENT and not releasing socket
221 	 * lock select source port, enter ourselves into the hash tables and
222 	 * complete initialization after this.
223 	 */
224 	tcp_set_state(sk, TCP_SYN_SENT);
225 	err = inet_hash_connect(&tcp_death_row, sk);
226 	if (err)
227 		goto failure;
228 
229 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
230 			       inet->inet_sport, inet->inet_dport, sk);
231 	if (IS_ERR(rt)) {
232 		err = PTR_ERR(rt);
233 		rt = NULL;
234 		goto failure;
235 	}
236 	/* OK, now commit destination to socket.  */
237 	sk->sk_gso_type = SKB_GSO_TCPV4;
238 	sk_setup_caps(sk, &rt->dst);
239 
240 	if (!tp->write_seq && likely(!tp->repair))
241 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
242 							   inet->inet_daddr,
243 							   inet->inet_sport,
244 							   usin->sin_port);
245 
246 	inet->inet_id = tp->write_seq ^ jiffies;
247 
248 	err = tcp_connect(sk);
249 
250 	rt = NULL;
251 	if (err)
252 		goto failure;
253 
254 	return 0;
255 
256 failure:
257 	/*
258 	 * This unhashes the socket and releases the local port,
259 	 * if necessary.
260 	 */
261 	tcp_set_state(sk, TCP_CLOSE);
262 	ip_rt_put(rt);
263 	sk->sk_route_caps = 0;
264 	inet->inet_dport = 0;
265 	return err;
266 }
267 EXPORT_SYMBOL(tcp_v4_connect);
268 
269 /*
270  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
271  * It can be called through tcp_release_cb() if socket was owned by user
272  * at the time tcp_v4_err() was called to handle ICMP message.
273  */
274 void tcp_v4_mtu_reduced(struct sock *sk)
275 {
276 	struct dst_entry *dst;
277 	struct inet_sock *inet = inet_sk(sk);
278 	u32 mtu = tcp_sk(sk)->mtu_info;
279 
280 	dst = inet_csk_update_pmtu(sk, mtu);
281 	if (!dst)
282 		return;
283 
284 	/* Something is about to be wrong... Remember soft error
285 	 * for the case, if this connection will not able to recover.
286 	 */
287 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
288 		sk->sk_err_soft = EMSGSIZE;
289 
290 	mtu = dst_mtu(dst);
291 
292 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
293 	    ip_sk_accept_pmtu(sk) &&
294 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
295 		tcp_sync_mss(sk, mtu);
296 
297 		/* Resend the TCP packet because it's
298 		 * clear that the old packet has been
299 		 * dropped. This is the new "fast" path mtu
300 		 * discovery.
301 		 */
302 		tcp_simple_retransmit(sk);
303 	} /* else let the usual retransmit timer handle it */
304 }
305 EXPORT_SYMBOL(tcp_v4_mtu_reduced);
306 
307 static void do_redirect(struct sk_buff *skb, struct sock *sk)
308 {
309 	struct dst_entry *dst = __sk_dst_check(sk, 0);
310 
311 	if (dst)
312 		dst->ops->redirect(dst, sk, skb);
313 }
314 
315 /*
316  * This routine is called by the ICMP module when it gets some
317  * sort of error condition.  If err < 0 then the socket should
318  * be closed and the error returned to the user.  If err > 0
319  * it's just the icmp type << 8 | icmp code.  After adjustment
320  * header points to the first 8 bytes of the tcp header.  We need
321  * to find the appropriate port.
322  *
323  * The locking strategy used here is very "optimistic". When
324  * someone else accesses the socket the ICMP is just dropped
325  * and for some paths there is no check at all.
326  * A more general error queue to queue errors for later handling
327  * is probably better.
328  *
329  */
330 
331 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
332 {
333 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
334 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
335 	struct inet_connection_sock *icsk;
336 	struct tcp_sock *tp;
337 	struct inet_sock *inet;
338 	const int type = icmp_hdr(icmp_skb)->type;
339 	const int code = icmp_hdr(icmp_skb)->code;
340 	struct sock *sk;
341 	struct sk_buff *skb;
342 	struct request_sock *fastopen;
343 	__u32 seq, snd_una;
344 	__u32 remaining;
345 	int err;
346 	struct net *net = dev_net(icmp_skb->dev);
347 
348 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
349 			iph->saddr, th->source, inet_iif(icmp_skb));
350 	if (!sk) {
351 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
352 		return;
353 	}
354 	if (sk->sk_state == TCP_TIME_WAIT) {
355 		inet_twsk_put(inet_twsk(sk));
356 		return;
357 	}
358 
359 	bh_lock_sock(sk);
360 	/* If too many ICMPs get dropped on busy
361 	 * servers this needs to be solved differently.
362 	 * We do take care of PMTU discovery (RFC1191) special case :
363 	 * we can receive locally generated ICMP messages while socket is held.
364 	 */
365 	if (sock_owned_by_user(sk)) {
366 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
367 			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
368 	}
369 	if (sk->sk_state == TCP_CLOSE)
370 		goto out;
371 
372 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
373 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
374 		goto out;
375 	}
376 
377 	icsk = inet_csk(sk);
378 	tp = tcp_sk(sk);
379 	seq = ntohl(th->seq);
380 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
381 	fastopen = tp->fastopen_rsk;
382 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
383 	if (sk->sk_state != TCP_LISTEN &&
384 	    !between(seq, snd_una, tp->snd_nxt)) {
385 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
386 		goto out;
387 	}
388 
389 	switch (type) {
390 	case ICMP_REDIRECT:
391 		do_redirect(icmp_skb, sk);
392 		goto out;
393 	case ICMP_SOURCE_QUENCH:
394 		/* Just silently ignore these. */
395 		goto out;
396 	case ICMP_PARAMETERPROB:
397 		err = EPROTO;
398 		break;
399 	case ICMP_DEST_UNREACH:
400 		if (code > NR_ICMP_UNREACH)
401 			goto out;
402 
403 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
404 			/* We are not interested in TCP_LISTEN and open_requests
405 			 * (SYN-ACKs send out by Linux are always <576bytes so
406 			 * they should go through unfragmented).
407 			 */
408 			if (sk->sk_state == TCP_LISTEN)
409 				goto out;
410 
411 			tp->mtu_info = info;
412 			if (!sock_owned_by_user(sk)) {
413 				tcp_v4_mtu_reduced(sk);
414 			} else {
415 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
416 					sock_hold(sk);
417 			}
418 			goto out;
419 		}
420 
421 		err = icmp_err_convert[code].errno;
422 		/* check if icmp_skb allows revert of backoff
423 		 * (see draft-zimmermann-tcp-lcd) */
424 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
425 			break;
426 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
427 		    !icsk->icsk_backoff || fastopen)
428 			break;
429 
430 		if (sock_owned_by_user(sk))
431 			break;
432 
433 		icsk->icsk_backoff--;
434 		inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
435 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
436 		tcp_bound_rto(sk);
437 
438 		skb = tcp_write_queue_head(sk);
439 		BUG_ON(!skb);
440 
441 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
442 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
443 
444 		if (remaining) {
445 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 						  remaining, TCP_RTO_MAX);
447 		} else {
448 			/* RTO revert clocked out retransmission.
449 			 * Will retransmit now */
450 			tcp_retransmit_timer(sk);
451 		}
452 
453 		break;
454 	case ICMP_TIME_EXCEEDED:
455 		err = EHOSTUNREACH;
456 		break;
457 	default:
458 		goto out;
459 	}
460 
461 	switch (sk->sk_state) {
462 		struct request_sock *req, **prev;
463 	case TCP_LISTEN:
464 		if (sock_owned_by_user(sk))
465 			goto out;
466 
467 		req = inet_csk_search_req(sk, &prev, th->dest,
468 					  iph->daddr, iph->saddr);
469 		if (!req)
470 			goto out;
471 
472 		/* ICMPs are not backlogged, hence we cannot get
473 		   an established socket here.
474 		 */
475 		WARN_ON(req->sk);
476 
477 		if (seq != tcp_rsk(req)->snt_isn) {
478 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
479 			goto out;
480 		}
481 
482 		/*
483 		 * Still in SYN_RECV, just remove it silently.
484 		 * There is no good way to pass the error to the newly
485 		 * created socket, and POSIX does not want network
486 		 * errors returned from accept().
487 		 */
488 		inet_csk_reqsk_queue_drop(sk, req, prev);
489 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
490 		goto out;
491 
492 	case TCP_SYN_SENT:
493 	case TCP_SYN_RECV:
494 		/* Only in fast or simultaneous open. If a fast open socket is
495 		 * is already accepted it is treated as a connected one below.
496 		 */
497 		if (fastopen && fastopen->sk == NULL)
498 			break;
499 
500 		if (!sock_owned_by_user(sk)) {
501 			sk->sk_err = err;
502 
503 			sk->sk_error_report(sk);
504 
505 			tcp_done(sk);
506 		} else {
507 			sk->sk_err_soft = err;
508 		}
509 		goto out;
510 	}
511 
512 	/* If we've already connected we will keep trying
513 	 * until we time out, or the user gives up.
514 	 *
515 	 * rfc1122 4.2.3.9 allows to consider as hard errors
516 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
517 	 * but it is obsoleted by pmtu discovery).
518 	 *
519 	 * Note, that in modern internet, where routing is unreliable
520 	 * and in each dark corner broken firewalls sit, sending random
521 	 * errors ordered by their masters even this two messages finally lose
522 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
523 	 *
524 	 * Now we are in compliance with RFCs.
525 	 *							--ANK (980905)
526 	 */
527 
528 	inet = inet_sk(sk);
529 	if (!sock_owned_by_user(sk) && inet->recverr) {
530 		sk->sk_err = err;
531 		sk->sk_error_report(sk);
532 	} else	{ /* Only an error on timeout */
533 		sk->sk_err_soft = err;
534 	}
535 
536 out:
537 	bh_unlock_sock(sk);
538 	sock_put(sk);
539 }
540 
541 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
542 {
543 	struct tcphdr *th = tcp_hdr(skb);
544 
545 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
546 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
547 		skb->csum_start = skb_transport_header(skb) - skb->head;
548 		skb->csum_offset = offsetof(struct tcphdr, check);
549 	} else {
550 		th->check = tcp_v4_check(skb->len, saddr, daddr,
551 					 csum_partial(th,
552 						      th->doff << 2,
553 						      skb->csum));
554 	}
555 }
556 
557 /* This routine computes an IPv4 TCP checksum. */
558 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
559 {
560 	const struct inet_sock *inet = inet_sk(sk);
561 
562 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
563 }
564 EXPORT_SYMBOL(tcp_v4_send_check);
565 
566 /*
567  *	This routine will send an RST to the other tcp.
568  *
569  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
570  *		      for reset.
571  *	Answer: if a packet caused RST, it is not for a socket
572  *		existing in our system, if it is matched to a socket,
573  *		it is just duplicate segment or bug in other side's TCP.
574  *		So that we build reply only basing on parameters
575  *		arrived with segment.
576  *	Exception: precedence violation. We do not implement it in any case.
577  */
578 
579 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
580 {
581 	const struct tcphdr *th = tcp_hdr(skb);
582 	struct {
583 		struct tcphdr th;
584 #ifdef CONFIG_TCP_MD5SIG
585 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
586 #endif
587 	} rep;
588 	struct ip_reply_arg arg;
589 #ifdef CONFIG_TCP_MD5SIG
590 	struct tcp_md5sig_key *key;
591 	const __u8 *hash_location = NULL;
592 	unsigned char newhash[16];
593 	int genhash;
594 	struct sock *sk1 = NULL;
595 #endif
596 	struct net *net;
597 
598 	/* Never send a reset in response to a reset. */
599 	if (th->rst)
600 		return;
601 
602 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
603 		return;
604 
605 	/* Swap the send and the receive. */
606 	memset(&rep, 0, sizeof(rep));
607 	rep.th.dest   = th->source;
608 	rep.th.source = th->dest;
609 	rep.th.doff   = sizeof(struct tcphdr) / 4;
610 	rep.th.rst    = 1;
611 
612 	if (th->ack) {
613 		rep.th.seq = th->ack_seq;
614 	} else {
615 		rep.th.ack = 1;
616 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
617 				       skb->len - (th->doff << 2));
618 	}
619 
620 	memset(&arg, 0, sizeof(arg));
621 	arg.iov[0].iov_base = (unsigned char *)&rep;
622 	arg.iov[0].iov_len  = sizeof(rep.th);
623 
624 #ifdef CONFIG_TCP_MD5SIG
625 	hash_location = tcp_parse_md5sig_option(th);
626 	if (!sk && hash_location) {
627 		/*
628 		 * active side is lost. Try to find listening socket through
629 		 * source port, and then find md5 key through listening socket.
630 		 * we are not loose security here:
631 		 * Incoming packet is checked with md5 hash with finding key,
632 		 * no RST generated if md5 hash doesn't match.
633 		 */
634 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
635 					     &tcp_hashinfo, ip_hdr(skb)->saddr,
636 					     th->source, ip_hdr(skb)->daddr,
637 					     ntohs(th->source), inet_iif(skb));
638 		/* don't send rst if it can't find key */
639 		if (!sk1)
640 			return;
641 		rcu_read_lock();
642 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
643 					&ip_hdr(skb)->saddr, AF_INET);
644 		if (!key)
645 			goto release_sk1;
646 
647 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
648 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
649 			goto release_sk1;
650 	} else {
651 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
652 					     &ip_hdr(skb)->saddr,
653 					     AF_INET) : NULL;
654 	}
655 
656 	if (key) {
657 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
658 				   (TCPOPT_NOP << 16) |
659 				   (TCPOPT_MD5SIG << 8) |
660 				   TCPOLEN_MD5SIG);
661 		/* Update length and the length the header thinks exists */
662 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
663 		rep.th.doff = arg.iov[0].iov_len / 4;
664 
665 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
666 				     key, ip_hdr(skb)->saddr,
667 				     ip_hdr(skb)->daddr, &rep.th);
668 	}
669 #endif
670 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
671 				      ip_hdr(skb)->saddr, /* XXX */
672 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
673 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
674 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
675 	/* When socket is gone, all binding information is lost.
676 	 * routing might fail in this case. No choice here, if we choose to force
677 	 * input interface, we will misroute in case of asymmetric route.
678 	 */
679 	if (sk)
680 		arg.bound_dev_if = sk->sk_bound_dev_if;
681 
682 	net = dev_net(skb_dst(skb)->dev);
683 	arg.tos = ip_hdr(skb)->tos;
684 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
685 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
686 
687 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
688 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
689 
690 #ifdef CONFIG_TCP_MD5SIG
691 release_sk1:
692 	if (sk1) {
693 		rcu_read_unlock();
694 		sock_put(sk1);
695 	}
696 #endif
697 }
698 
699 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
700    outside socket context is ugly, certainly. What can I do?
701  */
702 
703 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
704 			    u32 win, u32 tsval, u32 tsecr, int oif,
705 			    struct tcp_md5sig_key *key,
706 			    int reply_flags, u8 tos)
707 {
708 	const struct tcphdr *th = tcp_hdr(skb);
709 	struct {
710 		struct tcphdr th;
711 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
712 #ifdef CONFIG_TCP_MD5SIG
713 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
714 #endif
715 			];
716 	} rep;
717 	struct ip_reply_arg arg;
718 	struct net *net = dev_net(skb_dst(skb)->dev);
719 
720 	memset(&rep.th, 0, sizeof(struct tcphdr));
721 	memset(&arg, 0, sizeof(arg));
722 
723 	arg.iov[0].iov_base = (unsigned char *)&rep;
724 	arg.iov[0].iov_len  = sizeof(rep.th);
725 	if (tsecr) {
726 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
727 				   (TCPOPT_TIMESTAMP << 8) |
728 				   TCPOLEN_TIMESTAMP);
729 		rep.opt[1] = htonl(tsval);
730 		rep.opt[2] = htonl(tsecr);
731 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
732 	}
733 
734 	/* Swap the send and the receive. */
735 	rep.th.dest    = th->source;
736 	rep.th.source  = th->dest;
737 	rep.th.doff    = arg.iov[0].iov_len / 4;
738 	rep.th.seq     = htonl(seq);
739 	rep.th.ack_seq = htonl(ack);
740 	rep.th.ack     = 1;
741 	rep.th.window  = htons(win);
742 
743 #ifdef CONFIG_TCP_MD5SIG
744 	if (key) {
745 		int offset = (tsecr) ? 3 : 0;
746 
747 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
748 					  (TCPOPT_NOP << 16) |
749 					  (TCPOPT_MD5SIG << 8) |
750 					  TCPOLEN_MD5SIG);
751 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
752 		rep.th.doff = arg.iov[0].iov_len/4;
753 
754 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
755 				    key, ip_hdr(skb)->saddr,
756 				    ip_hdr(skb)->daddr, &rep.th);
757 	}
758 #endif
759 	arg.flags = reply_flags;
760 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
761 				      ip_hdr(skb)->saddr, /* XXX */
762 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
763 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
764 	if (oif)
765 		arg.bound_dev_if = oif;
766 	arg.tos = tos;
767 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
768 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
769 
770 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
771 }
772 
773 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
774 {
775 	struct inet_timewait_sock *tw = inet_twsk(sk);
776 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
777 
778 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
779 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
780 			tcp_time_stamp + tcptw->tw_ts_offset,
781 			tcptw->tw_ts_recent,
782 			tw->tw_bound_dev_if,
783 			tcp_twsk_md5_key(tcptw),
784 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
785 			tw->tw_tos
786 			);
787 
788 	inet_twsk_put(tw);
789 }
790 
791 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
792 				  struct request_sock *req)
793 {
794 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
795 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
796 	 */
797 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
798 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
799 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
800 			tcp_time_stamp,
801 			req->ts_recent,
802 			0,
803 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
804 					  AF_INET),
805 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
806 			ip_hdr(skb)->tos);
807 }
808 
809 /*
810  *	Send a SYN-ACK after having received a SYN.
811  *	This still operates on a request_sock only, not on a big
812  *	socket.
813  */
814 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
815 			      struct flowi *fl,
816 			      struct request_sock *req,
817 			      u16 queue_mapping,
818 			      struct tcp_fastopen_cookie *foc)
819 {
820 	const struct inet_request_sock *ireq = inet_rsk(req);
821 	struct flowi4 fl4;
822 	int err = -1;
823 	struct sk_buff *skb;
824 
825 	/* First, grab a route. */
826 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
827 		return -1;
828 
829 	skb = tcp_make_synack(sk, dst, req, foc);
830 
831 	if (skb) {
832 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
833 
834 		skb_set_queue_mapping(skb, queue_mapping);
835 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
836 					    ireq->ir_rmt_addr,
837 					    ireq->opt);
838 		err = net_xmit_eval(err);
839 	}
840 
841 	return err;
842 }
843 
844 /*
845  *	IPv4 request_sock destructor.
846  */
847 static void tcp_v4_reqsk_destructor(struct request_sock *req)
848 {
849 	kfree(inet_rsk(req)->opt);
850 }
851 
852 /*
853  * Return true if a syncookie should be sent
854  */
855 bool tcp_syn_flood_action(struct sock *sk,
856 			 const struct sk_buff *skb,
857 			 const char *proto)
858 {
859 	const char *msg = "Dropping request";
860 	bool want_cookie = false;
861 	struct listen_sock *lopt;
862 
863 #ifdef CONFIG_SYN_COOKIES
864 	if (sysctl_tcp_syncookies) {
865 		msg = "Sending cookies";
866 		want_cookie = true;
867 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
868 	} else
869 #endif
870 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
871 
872 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
873 	if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
874 		lopt->synflood_warned = 1;
875 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
876 			proto, ntohs(tcp_hdr(skb)->dest), msg);
877 	}
878 	return want_cookie;
879 }
880 EXPORT_SYMBOL(tcp_syn_flood_action);
881 
882 /*
883  * Save and compile IPv4 options into the request_sock if needed.
884  */
885 static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
886 {
887 	const struct ip_options *opt = &(IPCB(skb)->opt);
888 	struct ip_options_rcu *dopt = NULL;
889 
890 	if (opt && opt->optlen) {
891 		int opt_size = sizeof(*dopt) + opt->optlen;
892 
893 		dopt = kmalloc(opt_size, GFP_ATOMIC);
894 		if (dopt) {
895 			if (ip_options_echo(&dopt->opt, skb)) {
896 				kfree(dopt);
897 				dopt = NULL;
898 			}
899 		}
900 	}
901 	return dopt;
902 }
903 
904 #ifdef CONFIG_TCP_MD5SIG
905 /*
906  * RFC2385 MD5 checksumming requires a mapping of
907  * IP address->MD5 Key.
908  * We need to maintain these in the sk structure.
909  */
910 
911 /* Find the Key structure for an address.  */
912 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
913 					 const union tcp_md5_addr *addr,
914 					 int family)
915 {
916 	struct tcp_sock *tp = tcp_sk(sk);
917 	struct tcp_md5sig_key *key;
918 	unsigned int size = sizeof(struct in_addr);
919 	struct tcp_md5sig_info *md5sig;
920 
921 	/* caller either holds rcu_read_lock() or socket lock */
922 	md5sig = rcu_dereference_check(tp->md5sig_info,
923 				       sock_owned_by_user(sk) ||
924 				       lockdep_is_held(&sk->sk_lock.slock));
925 	if (!md5sig)
926 		return NULL;
927 #if IS_ENABLED(CONFIG_IPV6)
928 	if (family == AF_INET6)
929 		size = sizeof(struct in6_addr);
930 #endif
931 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
932 		if (key->family != family)
933 			continue;
934 		if (!memcmp(&key->addr, addr, size))
935 			return key;
936 	}
937 	return NULL;
938 }
939 EXPORT_SYMBOL(tcp_md5_do_lookup);
940 
941 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
942 					 struct sock *addr_sk)
943 {
944 	union tcp_md5_addr *addr;
945 
946 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
947 	return tcp_md5_do_lookup(sk, addr, AF_INET);
948 }
949 EXPORT_SYMBOL(tcp_v4_md5_lookup);
950 
951 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
952 						      struct request_sock *req)
953 {
954 	union tcp_md5_addr *addr;
955 
956 	addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
957 	return tcp_md5_do_lookup(sk, addr, AF_INET);
958 }
959 
960 /* This can be called on a newly created socket, from other files */
961 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
962 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
963 {
964 	/* Add Key to the list */
965 	struct tcp_md5sig_key *key;
966 	struct tcp_sock *tp = tcp_sk(sk);
967 	struct tcp_md5sig_info *md5sig;
968 
969 	key = tcp_md5_do_lookup(sk, addr, family);
970 	if (key) {
971 		/* Pre-existing entry - just update that one. */
972 		memcpy(key->key, newkey, newkeylen);
973 		key->keylen = newkeylen;
974 		return 0;
975 	}
976 
977 	md5sig = rcu_dereference_protected(tp->md5sig_info,
978 					   sock_owned_by_user(sk));
979 	if (!md5sig) {
980 		md5sig = kmalloc(sizeof(*md5sig), gfp);
981 		if (!md5sig)
982 			return -ENOMEM;
983 
984 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
985 		INIT_HLIST_HEAD(&md5sig->head);
986 		rcu_assign_pointer(tp->md5sig_info, md5sig);
987 	}
988 
989 	key = sock_kmalloc(sk, sizeof(*key), gfp);
990 	if (!key)
991 		return -ENOMEM;
992 	if (!tcp_alloc_md5sig_pool()) {
993 		sock_kfree_s(sk, key, sizeof(*key));
994 		return -ENOMEM;
995 	}
996 
997 	memcpy(key->key, newkey, newkeylen);
998 	key->keylen = newkeylen;
999 	key->family = family;
1000 	memcpy(&key->addr, addr,
1001 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1002 				      sizeof(struct in_addr));
1003 	hlist_add_head_rcu(&key->node, &md5sig->head);
1004 	return 0;
1005 }
1006 EXPORT_SYMBOL(tcp_md5_do_add);
1007 
1008 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1009 {
1010 	struct tcp_md5sig_key *key;
1011 
1012 	key = tcp_md5_do_lookup(sk, addr, family);
1013 	if (!key)
1014 		return -ENOENT;
1015 	hlist_del_rcu(&key->node);
1016 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1017 	kfree_rcu(key, rcu);
1018 	return 0;
1019 }
1020 EXPORT_SYMBOL(tcp_md5_do_del);
1021 
1022 static void tcp_clear_md5_list(struct sock *sk)
1023 {
1024 	struct tcp_sock *tp = tcp_sk(sk);
1025 	struct tcp_md5sig_key *key;
1026 	struct hlist_node *n;
1027 	struct tcp_md5sig_info *md5sig;
1028 
1029 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1030 
1031 	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1032 		hlist_del_rcu(&key->node);
1033 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1034 		kfree_rcu(key, rcu);
1035 	}
1036 }
1037 
1038 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1039 				 int optlen)
1040 {
1041 	struct tcp_md5sig cmd;
1042 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1043 
1044 	if (optlen < sizeof(cmd))
1045 		return -EINVAL;
1046 
1047 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1048 		return -EFAULT;
1049 
1050 	if (sin->sin_family != AF_INET)
1051 		return -EINVAL;
1052 
1053 	if (!cmd.tcpm_keylen)
1054 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1055 				      AF_INET);
1056 
1057 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1058 		return -EINVAL;
1059 
1060 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1061 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1062 			      GFP_KERNEL);
1063 }
1064 
1065 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1066 					__be32 daddr, __be32 saddr, int nbytes)
1067 {
1068 	struct tcp4_pseudohdr *bp;
1069 	struct scatterlist sg;
1070 
1071 	bp = &hp->md5_blk.ip4;
1072 
1073 	/*
1074 	 * 1. the TCP pseudo-header (in the order: source IP address,
1075 	 * destination IP address, zero-padded protocol number, and
1076 	 * segment length)
1077 	 */
1078 	bp->saddr = saddr;
1079 	bp->daddr = daddr;
1080 	bp->pad = 0;
1081 	bp->protocol = IPPROTO_TCP;
1082 	bp->len = cpu_to_be16(nbytes);
1083 
1084 	sg_init_one(&sg, bp, sizeof(*bp));
1085 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1086 }
1087 
1088 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1089 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1090 {
1091 	struct tcp_md5sig_pool *hp;
1092 	struct hash_desc *desc;
1093 
1094 	hp = tcp_get_md5sig_pool();
1095 	if (!hp)
1096 		goto clear_hash_noput;
1097 	desc = &hp->md5_desc;
1098 
1099 	if (crypto_hash_init(desc))
1100 		goto clear_hash;
1101 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1102 		goto clear_hash;
1103 	if (tcp_md5_hash_header(hp, th))
1104 		goto clear_hash;
1105 	if (tcp_md5_hash_key(hp, key))
1106 		goto clear_hash;
1107 	if (crypto_hash_final(desc, md5_hash))
1108 		goto clear_hash;
1109 
1110 	tcp_put_md5sig_pool();
1111 	return 0;
1112 
1113 clear_hash:
1114 	tcp_put_md5sig_pool();
1115 clear_hash_noput:
1116 	memset(md5_hash, 0, 16);
1117 	return 1;
1118 }
1119 
1120 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1121 			const struct sock *sk, const struct request_sock *req,
1122 			const struct sk_buff *skb)
1123 {
1124 	struct tcp_md5sig_pool *hp;
1125 	struct hash_desc *desc;
1126 	const struct tcphdr *th = tcp_hdr(skb);
1127 	__be32 saddr, daddr;
1128 
1129 	if (sk) {
1130 		saddr = inet_sk(sk)->inet_saddr;
1131 		daddr = inet_sk(sk)->inet_daddr;
1132 	} else if (req) {
1133 		saddr = inet_rsk(req)->ir_loc_addr;
1134 		daddr = inet_rsk(req)->ir_rmt_addr;
1135 	} else {
1136 		const struct iphdr *iph = ip_hdr(skb);
1137 		saddr = iph->saddr;
1138 		daddr = iph->daddr;
1139 	}
1140 
1141 	hp = tcp_get_md5sig_pool();
1142 	if (!hp)
1143 		goto clear_hash_noput;
1144 	desc = &hp->md5_desc;
1145 
1146 	if (crypto_hash_init(desc))
1147 		goto clear_hash;
1148 
1149 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1150 		goto clear_hash;
1151 	if (tcp_md5_hash_header(hp, th))
1152 		goto clear_hash;
1153 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1154 		goto clear_hash;
1155 	if (tcp_md5_hash_key(hp, key))
1156 		goto clear_hash;
1157 	if (crypto_hash_final(desc, md5_hash))
1158 		goto clear_hash;
1159 
1160 	tcp_put_md5sig_pool();
1161 	return 0;
1162 
1163 clear_hash:
1164 	tcp_put_md5sig_pool();
1165 clear_hash_noput:
1166 	memset(md5_hash, 0, 16);
1167 	return 1;
1168 }
1169 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1170 
1171 static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
1172 				      const struct sk_buff *skb)
1173 {
1174 	/*
1175 	 * This gets called for each TCP segment that arrives
1176 	 * so we want to be efficient.
1177 	 * We have 3 drop cases:
1178 	 * o No MD5 hash and one expected.
1179 	 * o MD5 hash and we're not expecting one.
1180 	 * o MD5 hash and its wrong.
1181 	 */
1182 	const __u8 *hash_location = NULL;
1183 	struct tcp_md5sig_key *hash_expected;
1184 	const struct iphdr *iph = ip_hdr(skb);
1185 	const struct tcphdr *th = tcp_hdr(skb);
1186 	int genhash;
1187 	unsigned char newhash[16];
1188 
1189 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1190 					  AF_INET);
1191 	hash_location = tcp_parse_md5sig_option(th);
1192 
1193 	/* We've parsed the options - do we have a hash? */
1194 	if (!hash_expected && !hash_location)
1195 		return false;
1196 
1197 	if (hash_expected && !hash_location) {
1198 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1199 		return true;
1200 	}
1201 
1202 	if (!hash_expected && hash_location) {
1203 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1204 		return true;
1205 	}
1206 
1207 	/* Okay, so this is hash_expected and hash_location -
1208 	 * so we need to calculate the checksum.
1209 	 */
1210 	genhash = tcp_v4_md5_hash_skb(newhash,
1211 				      hash_expected,
1212 				      NULL, NULL, skb);
1213 
1214 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1215 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1216 				     &iph->saddr, ntohs(th->source),
1217 				     &iph->daddr, ntohs(th->dest),
1218 				     genhash ? " tcp_v4_calc_md5_hash failed"
1219 				     : "");
1220 		return true;
1221 	}
1222 	return false;
1223 }
1224 
1225 static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1226 {
1227 	bool ret;
1228 
1229 	rcu_read_lock();
1230 	ret = __tcp_v4_inbound_md5_hash(sk, skb);
1231 	rcu_read_unlock();
1232 
1233 	return ret;
1234 }
1235 
1236 #endif
1237 
1238 static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1239 			    struct sk_buff *skb)
1240 {
1241 	struct inet_request_sock *ireq = inet_rsk(req);
1242 
1243 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1244 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1245 	ireq->no_srccheck = inet_sk(sk)->transparent;
1246 	ireq->opt = tcp_v4_save_options(skb);
1247 }
1248 
1249 static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1250 					  const struct request_sock *req,
1251 					  bool *strict)
1252 {
1253 	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1254 
1255 	if (strict) {
1256 		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1257 			*strict = true;
1258 		else
1259 			*strict = false;
1260 	}
1261 
1262 	return dst;
1263 }
1264 
1265 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1266 	.family		=	PF_INET,
1267 	.obj_size	=	sizeof(struct tcp_request_sock),
1268 	.rtx_syn_ack	=	tcp_rtx_synack,
1269 	.send_ack	=	tcp_v4_reqsk_send_ack,
1270 	.destructor	=	tcp_v4_reqsk_destructor,
1271 	.send_reset	=	tcp_v4_send_reset,
1272 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1273 };
1274 
1275 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1276 	.mss_clamp	=	TCP_MSS_DEFAULT,
1277 #ifdef CONFIG_TCP_MD5SIG
1278 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1279 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1280 #endif
1281 	.init_req	=	tcp_v4_init_req,
1282 #ifdef CONFIG_SYN_COOKIES
1283 	.cookie_init_seq =	cookie_v4_init_sequence,
1284 #endif
1285 	.route_req	=	tcp_v4_route_req,
1286 	.init_seq	=	tcp_v4_init_sequence,
1287 	.send_synack	=	tcp_v4_send_synack,
1288 	.queue_hash_add =	inet_csk_reqsk_queue_hash_add,
1289 };
1290 
1291 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1292 {
1293 	/* Never answer to SYNs send to broadcast or multicast */
1294 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1295 		goto drop;
1296 
1297 	return tcp_conn_request(&tcp_request_sock_ops,
1298 				&tcp_request_sock_ipv4_ops, sk, skb);
1299 
1300 drop:
1301 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1302 	return 0;
1303 }
1304 EXPORT_SYMBOL(tcp_v4_conn_request);
1305 
1306 
1307 /*
1308  * The three way handshake has completed - we got a valid synack -
1309  * now create the new socket.
1310  */
1311 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1312 				  struct request_sock *req,
1313 				  struct dst_entry *dst)
1314 {
1315 	struct inet_request_sock *ireq;
1316 	struct inet_sock *newinet;
1317 	struct tcp_sock *newtp;
1318 	struct sock *newsk;
1319 #ifdef CONFIG_TCP_MD5SIG
1320 	struct tcp_md5sig_key *key;
1321 #endif
1322 	struct ip_options_rcu *inet_opt;
1323 
1324 	if (sk_acceptq_is_full(sk))
1325 		goto exit_overflow;
1326 
1327 	newsk = tcp_create_openreq_child(sk, req, skb);
1328 	if (!newsk)
1329 		goto exit_nonewsk;
1330 
1331 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1332 	inet_sk_rx_dst_set(newsk, skb);
1333 
1334 	newtp		      = tcp_sk(newsk);
1335 	newinet		      = inet_sk(newsk);
1336 	ireq		      = inet_rsk(req);
1337 	newinet->inet_daddr   = ireq->ir_rmt_addr;
1338 	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1339 	newinet->inet_saddr	      = ireq->ir_loc_addr;
1340 	inet_opt	      = ireq->opt;
1341 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1342 	ireq->opt	      = NULL;
1343 	newinet->mc_index     = inet_iif(skb);
1344 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1345 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1346 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1347 	inet_set_txhash(newsk);
1348 	if (inet_opt)
1349 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1350 	newinet->inet_id = newtp->write_seq ^ jiffies;
1351 
1352 	if (!dst) {
1353 		dst = inet_csk_route_child_sock(sk, newsk, req);
1354 		if (!dst)
1355 			goto put_and_exit;
1356 	} else {
1357 		/* syncookie case : see end of cookie_v4_check() */
1358 	}
1359 	sk_setup_caps(newsk, dst);
1360 
1361 	tcp_sync_mss(newsk, dst_mtu(dst));
1362 	newtp->advmss = dst_metric_advmss(dst);
1363 	if (tcp_sk(sk)->rx_opt.user_mss &&
1364 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1365 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1366 
1367 	tcp_initialize_rcv_mss(newsk);
1368 
1369 #ifdef CONFIG_TCP_MD5SIG
1370 	/* Copy over the MD5 key from the original socket */
1371 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1372 				AF_INET);
1373 	if (key != NULL) {
1374 		/*
1375 		 * We're using one, so create a matching key
1376 		 * on the newsk structure. If we fail to get
1377 		 * memory, then we end up not copying the key
1378 		 * across. Shucks.
1379 		 */
1380 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1381 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1382 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1383 	}
1384 #endif
1385 
1386 	if (__inet_inherit_port(sk, newsk) < 0)
1387 		goto put_and_exit;
1388 	__inet_hash_nolisten(newsk, NULL);
1389 
1390 	return newsk;
1391 
1392 exit_overflow:
1393 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1394 exit_nonewsk:
1395 	dst_release(dst);
1396 exit:
1397 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1398 	return NULL;
1399 put_and_exit:
1400 	inet_csk_prepare_forced_close(newsk);
1401 	tcp_done(newsk);
1402 	goto exit;
1403 }
1404 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1405 
1406 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1407 {
1408 	struct tcphdr *th = tcp_hdr(skb);
1409 	const struct iphdr *iph = ip_hdr(skb);
1410 	struct sock *nsk;
1411 	struct request_sock **prev;
1412 	/* Find possible connection requests. */
1413 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1414 						       iph->saddr, iph->daddr);
1415 	if (req)
1416 		return tcp_check_req(sk, skb, req, prev, false);
1417 
1418 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1419 			th->source, iph->daddr, th->dest, inet_iif(skb));
1420 
1421 	if (nsk) {
1422 		if (nsk->sk_state != TCP_TIME_WAIT) {
1423 			bh_lock_sock(nsk);
1424 			return nsk;
1425 		}
1426 		inet_twsk_put(inet_twsk(nsk));
1427 		return NULL;
1428 	}
1429 
1430 #ifdef CONFIG_SYN_COOKIES
1431 	if (!th->syn)
1432 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1433 #endif
1434 	return sk;
1435 }
1436 
1437 /* The socket must have it's spinlock held when we get
1438  * here.
1439  *
1440  * We have a potential double-lock case here, so even when
1441  * doing backlog processing we use the BH locking scheme.
1442  * This is because we cannot sleep with the original spinlock
1443  * held.
1444  */
1445 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1446 {
1447 	struct sock *rsk;
1448 
1449 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1450 		struct dst_entry *dst = sk->sk_rx_dst;
1451 
1452 		sock_rps_save_rxhash(sk, skb);
1453 		if (dst) {
1454 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1455 			    dst->ops->check(dst, 0) == NULL) {
1456 				dst_release(dst);
1457 				sk->sk_rx_dst = NULL;
1458 			}
1459 		}
1460 		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1461 		return 0;
1462 	}
1463 
1464 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1465 		goto csum_err;
1466 
1467 	if (sk->sk_state == TCP_LISTEN) {
1468 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1469 		if (!nsk)
1470 			goto discard;
1471 
1472 		if (nsk != sk) {
1473 			sock_rps_save_rxhash(nsk, skb);
1474 			if (tcp_child_process(sk, nsk, skb)) {
1475 				rsk = nsk;
1476 				goto reset;
1477 			}
1478 			return 0;
1479 		}
1480 	} else
1481 		sock_rps_save_rxhash(sk, skb);
1482 
1483 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1484 		rsk = sk;
1485 		goto reset;
1486 	}
1487 	return 0;
1488 
1489 reset:
1490 	tcp_v4_send_reset(rsk, skb);
1491 discard:
1492 	kfree_skb(skb);
1493 	/* Be careful here. If this function gets more complicated and
1494 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1495 	 * might be destroyed here. This current version compiles correctly,
1496 	 * but you have been warned.
1497 	 */
1498 	return 0;
1499 
1500 csum_err:
1501 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1502 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1503 	goto discard;
1504 }
1505 EXPORT_SYMBOL(tcp_v4_do_rcv);
1506 
1507 void tcp_v4_early_demux(struct sk_buff *skb)
1508 {
1509 	const struct iphdr *iph;
1510 	const struct tcphdr *th;
1511 	struct sock *sk;
1512 
1513 	if (skb->pkt_type != PACKET_HOST)
1514 		return;
1515 
1516 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1517 		return;
1518 
1519 	iph = ip_hdr(skb);
1520 	th = tcp_hdr(skb);
1521 
1522 	if (th->doff < sizeof(struct tcphdr) / 4)
1523 		return;
1524 
1525 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1526 				       iph->saddr, th->source,
1527 				       iph->daddr, ntohs(th->dest),
1528 				       skb->skb_iif);
1529 	if (sk) {
1530 		skb->sk = sk;
1531 		skb->destructor = sock_edemux;
1532 		if (sk->sk_state != TCP_TIME_WAIT) {
1533 			struct dst_entry *dst = sk->sk_rx_dst;
1534 
1535 			if (dst)
1536 				dst = dst_check(dst, 0);
1537 			if (dst &&
1538 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1539 				skb_dst_set_noref(skb, dst);
1540 		}
1541 	}
1542 }
1543 
1544 /* Packet is added to VJ-style prequeue for processing in process
1545  * context, if a reader task is waiting. Apparently, this exciting
1546  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1547  * failed somewhere. Latency? Burstiness? Well, at least now we will
1548  * see, why it failed. 8)8)				  --ANK
1549  *
1550  */
1551 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1552 {
1553 	struct tcp_sock *tp = tcp_sk(sk);
1554 
1555 	if (sysctl_tcp_low_latency || !tp->ucopy.task)
1556 		return false;
1557 
1558 	if (skb->len <= tcp_hdrlen(skb) &&
1559 	    skb_queue_len(&tp->ucopy.prequeue) == 0)
1560 		return false;
1561 
1562 	skb_dst_force(skb);
1563 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
1564 	tp->ucopy.memory += skb->truesize;
1565 	if (tp->ucopy.memory > sk->sk_rcvbuf) {
1566 		struct sk_buff *skb1;
1567 
1568 		BUG_ON(sock_owned_by_user(sk));
1569 
1570 		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1571 			sk_backlog_rcv(sk, skb1);
1572 			NET_INC_STATS_BH(sock_net(sk),
1573 					 LINUX_MIB_TCPPREQUEUEDROPPED);
1574 		}
1575 
1576 		tp->ucopy.memory = 0;
1577 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1578 		wake_up_interruptible_sync_poll(sk_sleep(sk),
1579 					   POLLIN | POLLRDNORM | POLLRDBAND);
1580 		if (!inet_csk_ack_scheduled(sk))
1581 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1582 						  (3 * tcp_rto_min(sk)) / 4,
1583 						  TCP_RTO_MAX);
1584 	}
1585 	return true;
1586 }
1587 EXPORT_SYMBOL(tcp_prequeue);
1588 
1589 /*
1590  *	From tcp_input.c
1591  */
1592 
1593 int tcp_v4_rcv(struct sk_buff *skb)
1594 {
1595 	const struct iphdr *iph;
1596 	const struct tcphdr *th;
1597 	struct sock *sk;
1598 	int ret;
1599 	struct net *net = dev_net(skb->dev);
1600 
1601 	if (skb->pkt_type != PACKET_HOST)
1602 		goto discard_it;
1603 
1604 	/* Count it even if it's bad */
1605 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1606 
1607 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1608 		goto discard_it;
1609 
1610 	th = tcp_hdr(skb);
1611 
1612 	if (th->doff < sizeof(struct tcphdr) / 4)
1613 		goto bad_packet;
1614 	if (!pskb_may_pull(skb, th->doff * 4))
1615 		goto discard_it;
1616 
1617 	/* An explanation is required here, I think.
1618 	 * Packet length and doff are validated by header prediction,
1619 	 * provided case of th->doff==0 is eliminated.
1620 	 * So, we defer the checks. */
1621 
1622 	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1623 		goto csum_error;
1624 
1625 	th = tcp_hdr(skb);
1626 	iph = ip_hdr(skb);
1627 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1628 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1629 				    skb->len - th->doff * 4);
1630 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1631 	TCP_SKB_CB(skb)->when	 = 0;
1632 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1633 	TCP_SKB_CB(skb)->sacked	 = 0;
1634 
1635 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1636 	if (!sk)
1637 		goto no_tcp_socket;
1638 
1639 process:
1640 	if (sk->sk_state == TCP_TIME_WAIT)
1641 		goto do_time_wait;
1642 
1643 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1644 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1645 		goto discard_and_relse;
1646 	}
1647 
1648 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1649 		goto discard_and_relse;
1650 
1651 #ifdef CONFIG_TCP_MD5SIG
1652 	/*
1653 	 * We really want to reject the packet as early as possible
1654 	 * if:
1655 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1656 	 *  o There is an MD5 option and we're not expecting one
1657 	 */
1658 	if (tcp_v4_inbound_md5_hash(sk, skb))
1659 		goto discard_and_relse;
1660 #endif
1661 
1662 	nf_reset(skb);
1663 
1664 	if (sk_filter(sk, skb))
1665 		goto discard_and_relse;
1666 
1667 	sk_mark_napi_id(sk, skb);
1668 	skb->dev = NULL;
1669 
1670 	bh_lock_sock_nested(sk);
1671 	ret = 0;
1672 	if (!sock_owned_by_user(sk)) {
1673 #ifdef CONFIG_NET_DMA
1674 		struct tcp_sock *tp = tcp_sk(sk);
1675 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1676 			tp->ucopy.dma_chan = net_dma_find_channel();
1677 		if (tp->ucopy.dma_chan)
1678 			ret = tcp_v4_do_rcv(sk, skb);
1679 		else
1680 #endif
1681 		{
1682 			if (!tcp_prequeue(sk, skb))
1683 				ret = tcp_v4_do_rcv(sk, skb);
1684 		}
1685 	} else if (unlikely(sk_add_backlog(sk, skb,
1686 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
1687 		bh_unlock_sock(sk);
1688 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1689 		goto discard_and_relse;
1690 	}
1691 	bh_unlock_sock(sk);
1692 
1693 	sock_put(sk);
1694 
1695 	return ret;
1696 
1697 no_tcp_socket:
1698 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1699 		goto discard_it;
1700 
1701 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1702 csum_error:
1703 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1704 bad_packet:
1705 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1706 	} else {
1707 		tcp_v4_send_reset(NULL, skb);
1708 	}
1709 
1710 discard_it:
1711 	/* Discard frame. */
1712 	kfree_skb(skb);
1713 	return 0;
1714 
1715 discard_and_relse:
1716 	sock_put(sk);
1717 	goto discard_it;
1718 
1719 do_time_wait:
1720 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1721 		inet_twsk_put(inet_twsk(sk));
1722 		goto discard_it;
1723 	}
1724 
1725 	if (skb->len < (th->doff << 2)) {
1726 		inet_twsk_put(inet_twsk(sk));
1727 		goto bad_packet;
1728 	}
1729 	if (tcp_checksum_complete(skb)) {
1730 		inet_twsk_put(inet_twsk(sk));
1731 		goto csum_error;
1732 	}
1733 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1734 	case TCP_TW_SYN: {
1735 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1736 							&tcp_hashinfo,
1737 							iph->saddr, th->source,
1738 							iph->daddr, th->dest,
1739 							inet_iif(skb));
1740 		if (sk2) {
1741 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1742 			inet_twsk_put(inet_twsk(sk));
1743 			sk = sk2;
1744 			goto process;
1745 		}
1746 		/* Fall through to ACK */
1747 	}
1748 	case TCP_TW_ACK:
1749 		tcp_v4_timewait_ack(sk, skb);
1750 		break;
1751 	case TCP_TW_RST:
1752 		goto no_tcp_socket;
1753 	case TCP_TW_SUCCESS:;
1754 	}
1755 	goto discard_it;
1756 }
1757 
1758 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1759 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1760 	.twsk_unique	= tcp_twsk_unique,
1761 	.twsk_destructor= tcp_twsk_destructor,
1762 };
1763 
1764 void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1765 {
1766 	struct dst_entry *dst = skb_dst(skb);
1767 
1768 	dst_hold(dst);
1769 	sk->sk_rx_dst = dst;
1770 	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1771 }
1772 EXPORT_SYMBOL(inet_sk_rx_dst_set);
1773 
1774 const struct inet_connection_sock_af_ops ipv4_specific = {
1775 	.queue_xmit	   = ip_queue_xmit,
1776 	.send_check	   = tcp_v4_send_check,
1777 	.rebuild_header	   = inet_sk_rebuild_header,
1778 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1779 	.conn_request	   = tcp_v4_conn_request,
1780 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1781 	.net_header_len	   = sizeof(struct iphdr),
1782 	.setsockopt	   = ip_setsockopt,
1783 	.getsockopt	   = ip_getsockopt,
1784 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1785 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1786 	.bind_conflict	   = inet_csk_bind_conflict,
1787 #ifdef CONFIG_COMPAT
1788 	.compat_setsockopt = compat_ip_setsockopt,
1789 	.compat_getsockopt = compat_ip_getsockopt,
1790 #endif
1791 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1792 };
1793 EXPORT_SYMBOL(ipv4_specific);
1794 
1795 #ifdef CONFIG_TCP_MD5SIG
1796 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1797 	.md5_lookup		= tcp_v4_md5_lookup,
1798 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1799 	.md5_parse		= tcp_v4_parse_md5_keys,
1800 };
1801 #endif
1802 
1803 /* NOTE: A lot of things set to zero explicitly by call to
1804  *       sk_alloc() so need not be done here.
1805  */
1806 static int tcp_v4_init_sock(struct sock *sk)
1807 {
1808 	struct inet_connection_sock *icsk = inet_csk(sk);
1809 
1810 	tcp_init_sock(sk);
1811 
1812 	icsk->icsk_af_ops = &ipv4_specific;
1813 
1814 #ifdef CONFIG_TCP_MD5SIG
1815 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1816 #endif
1817 
1818 	return 0;
1819 }
1820 
1821 void tcp_v4_destroy_sock(struct sock *sk)
1822 {
1823 	struct tcp_sock *tp = tcp_sk(sk);
1824 
1825 	tcp_clear_xmit_timers(sk);
1826 
1827 	tcp_cleanup_congestion_control(sk);
1828 
1829 	/* Cleanup up the write buffer. */
1830 	tcp_write_queue_purge(sk);
1831 
1832 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1833 	__skb_queue_purge(&tp->out_of_order_queue);
1834 
1835 #ifdef CONFIG_TCP_MD5SIG
1836 	/* Clean up the MD5 key list, if any */
1837 	if (tp->md5sig_info) {
1838 		tcp_clear_md5_list(sk);
1839 		kfree_rcu(tp->md5sig_info, rcu);
1840 		tp->md5sig_info = NULL;
1841 	}
1842 #endif
1843 
1844 #ifdef CONFIG_NET_DMA
1845 	/* Cleans up our sk_async_wait_queue */
1846 	__skb_queue_purge(&sk->sk_async_wait_queue);
1847 #endif
1848 
1849 	/* Clean prequeue, it must be empty really */
1850 	__skb_queue_purge(&tp->ucopy.prequeue);
1851 
1852 	/* Clean up a referenced TCP bind bucket. */
1853 	if (inet_csk(sk)->icsk_bind_hash)
1854 		inet_put_port(sk);
1855 
1856 	BUG_ON(tp->fastopen_rsk != NULL);
1857 
1858 	/* If socket is aborted during connect operation */
1859 	tcp_free_fastopen_req(tp);
1860 
1861 	sk_sockets_allocated_dec(sk);
1862 	sock_release_memcg(sk);
1863 }
1864 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1865 
1866 #ifdef CONFIG_PROC_FS
1867 /* Proc filesystem TCP sock list dumping. */
1868 
1869 /*
1870  * Get next listener socket follow cur.  If cur is NULL, get first socket
1871  * starting from bucket given in st->bucket; when st->bucket is zero the
1872  * very first socket in the hash table is returned.
1873  */
1874 static void *listening_get_next(struct seq_file *seq, void *cur)
1875 {
1876 	struct inet_connection_sock *icsk;
1877 	struct hlist_nulls_node *node;
1878 	struct sock *sk = cur;
1879 	struct inet_listen_hashbucket *ilb;
1880 	struct tcp_iter_state *st = seq->private;
1881 	struct net *net = seq_file_net(seq);
1882 
1883 	if (!sk) {
1884 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1885 		spin_lock_bh(&ilb->lock);
1886 		sk = sk_nulls_head(&ilb->head);
1887 		st->offset = 0;
1888 		goto get_sk;
1889 	}
1890 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1891 	++st->num;
1892 	++st->offset;
1893 
1894 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1895 		struct request_sock *req = cur;
1896 
1897 		icsk = inet_csk(st->syn_wait_sk);
1898 		req = req->dl_next;
1899 		while (1) {
1900 			while (req) {
1901 				if (req->rsk_ops->family == st->family) {
1902 					cur = req;
1903 					goto out;
1904 				}
1905 				req = req->dl_next;
1906 			}
1907 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1908 				break;
1909 get_req:
1910 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1911 		}
1912 		sk	  = sk_nulls_next(st->syn_wait_sk);
1913 		st->state = TCP_SEQ_STATE_LISTENING;
1914 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1915 	} else {
1916 		icsk = inet_csk(sk);
1917 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1918 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1919 			goto start_req;
1920 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1921 		sk = sk_nulls_next(sk);
1922 	}
1923 get_sk:
1924 	sk_nulls_for_each_from(sk, node) {
1925 		if (!net_eq(sock_net(sk), net))
1926 			continue;
1927 		if (sk->sk_family == st->family) {
1928 			cur = sk;
1929 			goto out;
1930 		}
1931 		icsk = inet_csk(sk);
1932 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1933 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1934 start_req:
1935 			st->uid		= sock_i_uid(sk);
1936 			st->syn_wait_sk = sk;
1937 			st->state	= TCP_SEQ_STATE_OPENREQ;
1938 			st->sbucket	= 0;
1939 			goto get_req;
1940 		}
1941 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1942 	}
1943 	spin_unlock_bh(&ilb->lock);
1944 	st->offset = 0;
1945 	if (++st->bucket < INET_LHTABLE_SIZE) {
1946 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1947 		spin_lock_bh(&ilb->lock);
1948 		sk = sk_nulls_head(&ilb->head);
1949 		goto get_sk;
1950 	}
1951 	cur = NULL;
1952 out:
1953 	return cur;
1954 }
1955 
1956 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1957 {
1958 	struct tcp_iter_state *st = seq->private;
1959 	void *rc;
1960 
1961 	st->bucket = 0;
1962 	st->offset = 0;
1963 	rc = listening_get_next(seq, NULL);
1964 
1965 	while (rc && *pos) {
1966 		rc = listening_get_next(seq, rc);
1967 		--*pos;
1968 	}
1969 	return rc;
1970 }
1971 
1972 static inline bool empty_bucket(const struct tcp_iter_state *st)
1973 {
1974 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1975 }
1976 
1977 /*
1978  * Get first established socket starting from bucket given in st->bucket.
1979  * If st->bucket is zero, the very first socket in the hash is returned.
1980  */
1981 static void *established_get_first(struct seq_file *seq)
1982 {
1983 	struct tcp_iter_state *st = seq->private;
1984 	struct net *net = seq_file_net(seq);
1985 	void *rc = NULL;
1986 
1987 	st->offset = 0;
1988 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1989 		struct sock *sk;
1990 		struct hlist_nulls_node *node;
1991 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1992 
1993 		/* Lockless fast path for the common case of empty buckets */
1994 		if (empty_bucket(st))
1995 			continue;
1996 
1997 		spin_lock_bh(lock);
1998 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1999 			if (sk->sk_family != st->family ||
2000 			    !net_eq(sock_net(sk), net)) {
2001 				continue;
2002 			}
2003 			rc = sk;
2004 			goto out;
2005 		}
2006 		spin_unlock_bh(lock);
2007 	}
2008 out:
2009 	return rc;
2010 }
2011 
2012 static void *established_get_next(struct seq_file *seq, void *cur)
2013 {
2014 	struct sock *sk = cur;
2015 	struct hlist_nulls_node *node;
2016 	struct tcp_iter_state *st = seq->private;
2017 	struct net *net = seq_file_net(seq);
2018 
2019 	++st->num;
2020 	++st->offset;
2021 
2022 	sk = sk_nulls_next(sk);
2023 
2024 	sk_nulls_for_each_from(sk, node) {
2025 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2026 			return sk;
2027 	}
2028 
2029 	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2030 	++st->bucket;
2031 	return established_get_first(seq);
2032 }
2033 
2034 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2035 {
2036 	struct tcp_iter_state *st = seq->private;
2037 	void *rc;
2038 
2039 	st->bucket = 0;
2040 	rc = established_get_first(seq);
2041 
2042 	while (rc && pos) {
2043 		rc = established_get_next(seq, rc);
2044 		--pos;
2045 	}
2046 	return rc;
2047 }
2048 
2049 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2050 {
2051 	void *rc;
2052 	struct tcp_iter_state *st = seq->private;
2053 
2054 	st->state = TCP_SEQ_STATE_LISTENING;
2055 	rc	  = listening_get_idx(seq, &pos);
2056 
2057 	if (!rc) {
2058 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2059 		rc	  = established_get_idx(seq, pos);
2060 	}
2061 
2062 	return rc;
2063 }
2064 
2065 static void *tcp_seek_last_pos(struct seq_file *seq)
2066 {
2067 	struct tcp_iter_state *st = seq->private;
2068 	int offset = st->offset;
2069 	int orig_num = st->num;
2070 	void *rc = NULL;
2071 
2072 	switch (st->state) {
2073 	case TCP_SEQ_STATE_OPENREQ:
2074 	case TCP_SEQ_STATE_LISTENING:
2075 		if (st->bucket >= INET_LHTABLE_SIZE)
2076 			break;
2077 		st->state = TCP_SEQ_STATE_LISTENING;
2078 		rc = listening_get_next(seq, NULL);
2079 		while (offset-- && rc)
2080 			rc = listening_get_next(seq, rc);
2081 		if (rc)
2082 			break;
2083 		st->bucket = 0;
2084 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2085 		/* Fallthrough */
2086 	case TCP_SEQ_STATE_ESTABLISHED:
2087 		if (st->bucket > tcp_hashinfo.ehash_mask)
2088 			break;
2089 		rc = established_get_first(seq);
2090 		while (offset-- && rc)
2091 			rc = established_get_next(seq, rc);
2092 	}
2093 
2094 	st->num = orig_num;
2095 
2096 	return rc;
2097 }
2098 
2099 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2100 {
2101 	struct tcp_iter_state *st = seq->private;
2102 	void *rc;
2103 
2104 	if (*pos && *pos == st->last_pos) {
2105 		rc = tcp_seek_last_pos(seq);
2106 		if (rc)
2107 			goto out;
2108 	}
2109 
2110 	st->state = TCP_SEQ_STATE_LISTENING;
2111 	st->num = 0;
2112 	st->bucket = 0;
2113 	st->offset = 0;
2114 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2115 
2116 out:
2117 	st->last_pos = *pos;
2118 	return rc;
2119 }
2120 
2121 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2122 {
2123 	struct tcp_iter_state *st = seq->private;
2124 	void *rc = NULL;
2125 
2126 	if (v == SEQ_START_TOKEN) {
2127 		rc = tcp_get_idx(seq, 0);
2128 		goto out;
2129 	}
2130 
2131 	switch (st->state) {
2132 	case TCP_SEQ_STATE_OPENREQ:
2133 	case TCP_SEQ_STATE_LISTENING:
2134 		rc = listening_get_next(seq, v);
2135 		if (!rc) {
2136 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2137 			st->bucket = 0;
2138 			st->offset = 0;
2139 			rc	  = established_get_first(seq);
2140 		}
2141 		break;
2142 	case TCP_SEQ_STATE_ESTABLISHED:
2143 		rc = established_get_next(seq, v);
2144 		break;
2145 	}
2146 out:
2147 	++*pos;
2148 	st->last_pos = *pos;
2149 	return rc;
2150 }
2151 
2152 static void tcp_seq_stop(struct seq_file *seq, void *v)
2153 {
2154 	struct tcp_iter_state *st = seq->private;
2155 
2156 	switch (st->state) {
2157 	case TCP_SEQ_STATE_OPENREQ:
2158 		if (v) {
2159 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2160 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2161 		}
2162 	case TCP_SEQ_STATE_LISTENING:
2163 		if (v != SEQ_START_TOKEN)
2164 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2165 		break;
2166 	case TCP_SEQ_STATE_ESTABLISHED:
2167 		if (v)
2168 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2169 		break;
2170 	}
2171 }
2172 
2173 int tcp_seq_open(struct inode *inode, struct file *file)
2174 {
2175 	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2176 	struct tcp_iter_state *s;
2177 	int err;
2178 
2179 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2180 			  sizeof(struct tcp_iter_state));
2181 	if (err < 0)
2182 		return err;
2183 
2184 	s = ((struct seq_file *)file->private_data)->private;
2185 	s->family		= afinfo->family;
2186 	s->last_pos 		= 0;
2187 	return 0;
2188 }
2189 EXPORT_SYMBOL(tcp_seq_open);
2190 
2191 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2192 {
2193 	int rc = 0;
2194 	struct proc_dir_entry *p;
2195 
2196 	afinfo->seq_ops.start		= tcp_seq_start;
2197 	afinfo->seq_ops.next		= tcp_seq_next;
2198 	afinfo->seq_ops.stop		= tcp_seq_stop;
2199 
2200 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2201 			     afinfo->seq_fops, afinfo);
2202 	if (!p)
2203 		rc = -ENOMEM;
2204 	return rc;
2205 }
2206 EXPORT_SYMBOL(tcp_proc_register);
2207 
2208 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2209 {
2210 	remove_proc_entry(afinfo->name, net->proc_net);
2211 }
2212 EXPORT_SYMBOL(tcp_proc_unregister);
2213 
2214 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2215 			 struct seq_file *f, int i, kuid_t uid)
2216 {
2217 	const struct inet_request_sock *ireq = inet_rsk(req);
2218 	long delta = req->expires - jiffies;
2219 
2220 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2221 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2222 		i,
2223 		ireq->ir_loc_addr,
2224 		ntohs(inet_sk(sk)->inet_sport),
2225 		ireq->ir_rmt_addr,
2226 		ntohs(ireq->ir_rmt_port),
2227 		TCP_SYN_RECV,
2228 		0, 0, /* could print option size, but that is af dependent. */
2229 		1,    /* timers active (only the expire timer) */
2230 		jiffies_delta_to_clock_t(delta),
2231 		req->num_timeout,
2232 		from_kuid_munged(seq_user_ns(f), uid),
2233 		0,  /* non standard timer */
2234 		0, /* open_requests have no inode */
2235 		atomic_read(&sk->sk_refcnt),
2236 		req);
2237 }
2238 
2239 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2240 {
2241 	int timer_active;
2242 	unsigned long timer_expires;
2243 	const struct tcp_sock *tp = tcp_sk(sk);
2244 	const struct inet_connection_sock *icsk = inet_csk(sk);
2245 	const struct inet_sock *inet = inet_sk(sk);
2246 	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2247 	__be32 dest = inet->inet_daddr;
2248 	__be32 src = inet->inet_rcv_saddr;
2249 	__u16 destp = ntohs(inet->inet_dport);
2250 	__u16 srcp = ntohs(inet->inet_sport);
2251 	int rx_queue;
2252 
2253 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2254 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2255 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2256 		timer_active	= 1;
2257 		timer_expires	= icsk->icsk_timeout;
2258 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2259 		timer_active	= 4;
2260 		timer_expires	= icsk->icsk_timeout;
2261 	} else if (timer_pending(&sk->sk_timer)) {
2262 		timer_active	= 2;
2263 		timer_expires	= sk->sk_timer.expires;
2264 	} else {
2265 		timer_active	= 0;
2266 		timer_expires = jiffies;
2267 	}
2268 
2269 	if (sk->sk_state == TCP_LISTEN)
2270 		rx_queue = sk->sk_ack_backlog;
2271 	else
2272 		/*
2273 		 * because we dont lock socket, we might find a transient negative value
2274 		 */
2275 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2276 
2277 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2278 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2279 		i, src, srcp, dest, destp, sk->sk_state,
2280 		tp->write_seq - tp->snd_una,
2281 		rx_queue,
2282 		timer_active,
2283 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2284 		icsk->icsk_retransmits,
2285 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2286 		icsk->icsk_probes_out,
2287 		sock_i_ino(sk),
2288 		atomic_read(&sk->sk_refcnt), sk,
2289 		jiffies_to_clock_t(icsk->icsk_rto),
2290 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2291 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2292 		tp->snd_cwnd,
2293 		sk->sk_state == TCP_LISTEN ?
2294 		    (fastopenq ? fastopenq->max_qlen : 0) :
2295 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2296 }
2297 
2298 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2299 			       struct seq_file *f, int i)
2300 {
2301 	__be32 dest, src;
2302 	__u16 destp, srcp;
2303 	s32 delta = tw->tw_ttd - inet_tw_time_stamp();
2304 
2305 	dest  = tw->tw_daddr;
2306 	src   = tw->tw_rcv_saddr;
2307 	destp = ntohs(tw->tw_dport);
2308 	srcp  = ntohs(tw->tw_sport);
2309 
2310 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2311 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2312 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2313 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2314 		atomic_read(&tw->tw_refcnt), tw);
2315 }
2316 
2317 #define TMPSZ 150
2318 
2319 static int tcp4_seq_show(struct seq_file *seq, void *v)
2320 {
2321 	struct tcp_iter_state *st;
2322 	struct sock *sk = v;
2323 
2324 	seq_setwidth(seq, TMPSZ - 1);
2325 	if (v == SEQ_START_TOKEN) {
2326 		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2327 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2328 			   "inode");
2329 		goto out;
2330 	}
2331 	st = seq->private;
2332 
2333 	switch (st->state) {
2334 	case TCP_SEQ_STATE_LISTENING:
2335 	case TCP_SEQ_STATE_ESTABLISHED:
2336 		if (sk->sk_state == TCP_TIME_WAIT)
2337 			get_timewait4_sock(v, seq, st->num);
2338 		else
2339 			get_tcp4_sock(v, seq, st->num);
2340 		break;
2341 	case TCP_SEQ_STATE_OPENREQ:
2342 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
2343 		break;
2344 	}
2345 out:
2346 	seq_pad(seq, '\n');
2347 	return 0;
2348 }
2349 
2350 static const struct file_operations tcp_afinfo_seq_fops = {
2351 	.owner   = THIS_MODULE,
2352 	.open    = tcp_seq_open,
2353 	.read    = seq_read,
2354 	.llseek  = seq_lseek,
2355 	.release = seq_release_net
2356 };
2357 
2358 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2359 	.name		= "tcp",
2360 	.family		= AF_INET,
2361 	.seq_fops	= &tcp_afinfo_seq_fops,
2362 	.seq_ops	= {
2363 		.show		= tcp4_seq_show,
2364 	},
2365 };
2366 
2367 static int __net_init tcp4_proc_init_net(struct net *net)
2368 {
2369 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2370 }
2371 
2372 static void __net_exit tcp4_proc_exit_net(struct net *net)
2373 {
2374 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2375 }
2376 
2377 static struct pernet_operations tcp4_net_ops = {
2378 	.init = tcp4_proc_init_net,
2379 	.exit = tcp4_proc_exit_net,
2380 };
2381 
2382 int __init tcp4_proc_init(void)
2383 {
2384 	return register_pernet_subsys(&tcp4_net_ops);
2385 }
2386 
2387 void tcp4_proc_exit(void)
2388 {
2389 	unregister_pernet_subsys(&tcp4_net_ops);
2390 }
2391 #endif /* CONFIG_PROC_FS */
2392 
2393 struct proto tcp_prot = {
2394 	.name			= "TCP",
2395 	.owner			= THIS_MODULE,
2396 	.close			= tcp_close,
2397 	.connect		= tcp_v4_connect,
2398 	.disconnect		= tcp_disconnect,
2399 	.accept			= inet_csk_accept,
2400 	.ioctl			= tcp_ioctl,
2401 	.init			= tcp_v4_init_sock,
2402 	.destroy		= tcp_v4_destroy_sock,
2403 	.shutdown		= tcp_shutdown,
2404 	.setsockopt		= tcp_setsockopt,
2405 	.getsockopt		= tcp_getsockopt,
2406 	.recvmsg		= tcp_recvmsg,
2407 	.sendmsg		= tcp_sendmsg,
2408 	.sendpage		= tcp_sendpage,
2409 	.backlog_rcv		= tcp_v4_do_rcv,
2410 	.release_cb		= tcp_release_cb,
2411 	.hash			= inet_hash,
2412 	.unhash			= inet_unhash,
2413 	.get_port		= inet_csk_get_port,
2414 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2415 	.stream_memory_free	= tcp_stream_memory_free,
2416 	.sockets_allocated	= &tcp_sockets_allocated,
2417 	.orphan_count		= &tcp_orphan_count,
2418 	.memory_allocated	= &tcp_memory_allocated,
2419 	.memory_pressure	= &tcp_memory_pressure,
2420 	.sysctl_mem		= sysctl_tcp_mem,
2421 	.sysctl_wmem		= sysctl_tcp_wmem,
2422 	.sysctl_rmem		= sysctl_tcp_rmem,
2423 	.max_header		= MAX_TCP_HEADER,
2424 	.obj_size		= sizeof(struct tcp_sock),
2425 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2426 	.twsk_prot		= &tcp_timewait_sock_ops,
2427 	.rsk_prot		= &tcp_request_sock_ops,
2428 	.h.hashinfo		= &tcp_hashinfo,
2429 	.no_autobind		= true,
2430 #ifdef CONFIG_COMPAT
2431 	.compat_setsockopt	= compat_tcp_setsockopt,
2432 	.compat_getsockopt	= compat_tcp_getsockopt,
2433 #endif
2434 #ifdef CONFIG_MEMCG_KMEM
2435 	.init_cgroup		= tcp_init_cgroup,
2436 	.destroy_cgroup		= tcp_destroy_cgroup,
2437 	.proto_cgroup		= tcp_proto_cgroup,
2438 #endif
2439 };
2440 EXPORT_SYMBOL(tcp_prot);
2441 
2442 static int __net_init tcp_sk_init(struct net *net)
2443 {
2444 	net->ipv4.sysctl_tcp_ecn = 2;
2445 	return 0;
2446 }
2447 
2448 static void __net_exit tcp_sk_exit(struct net *net)
2449 {
2450 }
2451 
2452 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2453 {
2454 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2455 }
2456 
2457 static struct pernet_operations __net_initdata tcp_sk_ops = {
2458        .init	   = tcp_sk_init,
2459        .exit	   = tcp_sk_exit,
2460        .exit_batch = tcp_sk_exit_batch,
2461 };
2462 
2463 void __init tcp_v4_init(void)
2464 {
2465 	inet_hashinfo_init(&tcp_hashinfo);
2466 	if (register_pernet_subsys(&tcp_sk_ops))
2467 		panic("Failed to create the TCP control socket.\n");
2468 }
2469