xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision 95e9fd10)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 #define pr_fmt(fmt) "TCP: " fmt
54 
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65 
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/netdma.h>
76 #include <net/secure_seq.h>
77 #include <net/tcp_memcontrol.h>
78 
79 #include <linux/inet.h>
80 #include <linux/ipv6.h>
81 #include <linux/stddef.h>
82 #include <linux/proc_fs.h>
83 #include <linux/seq_file.h>
84 
85 #include <linux/crypto.h>
86 #include <linux/scatterlist.h>
87 
88 int sysctl_tcp_tw_reuse __read_mostly;
89 int sysctl_tcp_low_latency __read_mostly;
90 EXPORT_SYMBOL(sysctl_tcp_low_latency);
91 
92 
93 #ifdef CONFIG_TCP_MD5SIG
94 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
96 #endif
97 
98 struct inet_hashinfo tcp_hashinfo;
99 EXPORT_SYMBOL(tcp_hashinfo);
100 
101 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102 {
103 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 					  ip_hdr(skb)->saddr,
105 					  tcp_hdr(skb)->dest,
106 					  tcp_hdr(skb)->source);
107 }
108 
109 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110 {
111 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 	struct tcp_sock *tp = tcp_sk(sk);
113 
114 	/* With PAWS, it is safe from the viewpoint
115 	   of data integrity. Even without PAWS it is safe provided sequence
116 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117 
118 	   Actually, the idea is close to VJ's one, only timestamp cache is
119 	   held not per host, but per port pair and TW bucket is used as state
120 	   holder.
121 
122 	   If TW bucket has been already destroyed we fall back to VJ's scheme
123 	   and use initial timestamp retrieved from peer table.
124 	 */
125 	if (tcptw->tw_ts_recent_stamp &&
126 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
127 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
128 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 		if (tp->write_seq == 0)
130 			tp->write_seq = 1;
131 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
132 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 		sock_hold(sktw);
134 		return 1;
135 	}
136 
137 	return 0;
138 }
139 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 
141 static int tcp_repair_connect(struct sock *sk)
142 {
143 	tcp_connect_init(sk);
144 	tcp_finish_connect(sk, NULL);
145 
146 	return 0;
147 }
148 
149 /* This will initiate an outgoing connection. */
150 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151 {
152 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 	struct inet_sock *inet = inet_sk(sk);
154 	struct tcp_sock *tp = tcp_sk(sk);
155 	__be16 orig_sport, orig_dport;
156 	__be32 daddr, nexthop;
157 	struct flowi4 *fl4;
158 	struct rtable *rt;
159 	int err;
160 	struct ip_options_rcu *inet_opt;
161 
162 	if (addr_len < sizeof(struct sockaddr_in))
163 		return -EINVAL;
164 
165 	if (usin->sin_family != AF_INET)
166 		return -EAFNOSUPPORT;
167 
168 	nexthop = daddr = usin->sin_addr.s_addr;
169 	inet_opt = rcu_dereference_protected(inet->inet_opt,
170 					     sock_owned_by_user(sk));
171 	if (inet_opt && inet_opt->opt.srr) {
172 		if (!daddr)
173 			return -EINVAL;
174 		nexthop = inet_opt->opt.faddr;
175 	}
176 
177 	orig_sport = inet->inet_sport;
178 	orig_dport = usin->sin_port;
179 	fl4 = &inet->cork.fl.u.ip4;
180 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
181 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
182 			      IPPROTO_TCP,
183 			      orig_sport, orig_dport, sk, true);
184 	if (IS_ERR(rt)) {
185 		err = PTR_ERR(rt);
186 		if (err == -ENETUNREACH)
187 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
188 		return err;
189 	}
190 
191 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
192 		ip_rt_put(rt);
193 		return -ENETUNREACH;
194 	}
195 
196 	if (!inet_opt || !inet_opt->opt.srr)
197 		daddr = fl4->daddr;
198 
199 	if (!inet->inet_saddr)
200 		inet->inet_saddr = fl4->saddr;
201 	inet->inet_rcv_saddr = inet->inet_saddr;
202 
203 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
204 		/* Reset inherited state */
205 		tp->rx_opt.ts_recent	   = 0;
206 		tp->rx_opt.ts_recent_stamp = 0;
207 		if (likely(!tp->repair))
208 			tp->write_seq	   = 0;
209 	}
210 
211 	if (tcp_death_row.sysctl_tw_recycle &&
212 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 		tcp_fetch_timewait_stamp(sk, &rt->dst);
214 
215 	inet->inet_dport = usin->sin_port;
216 	inet->inet_daddr = daddr;
217 
218 	inet_csk(sk)->icsk_ext_hdr_len = 0;
219 	if (inet_opt)
220 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
221 
222 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
223 
224 	/* Socket identity is still unknown (sport may be zero).
225 	 * However we set state to SYN-SENT and not releasing socket
226 	 * lock select source port, enter ourselves into the hash tables and
227 	 * complete initialization after this.
228 	 */
229 	tcp_set_state(sk, TCP_SYN_SENT);
230 	err = inet_hash_connect(&tcp_death_row, sk);
231 	if (err)
232 		goto failure;
233 
234 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
235 			       inet->inet_sport, inet->inet_dport, sk);
236 	if (IS_ERR(rt)) {
237 		err = PTR_ERR(rt);
238 		rt = NULL;
239 		goto failure;
240 	}
241 	/* OK, now commit destination to socket.  */
242 	sk->sk_gso_type = SKB_GSO_TCPV4;
243 	sk_setup_caps(sk, &rt->dst);
244 
245 	if (!tp->write_seq && likely(!tp->repair))
246 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
247 							   inet->inet_daddr,
248 							   inet->inet_sport,
249 							   usin->sin_port);
250 
251 	inet->inet_id = tp->write_seq ^ jiffies;
252 
253 	if (likely(!tp->repair))
254 		err = tcp_connect(sk);
255 	else
256 		err = tcp_repair_connect(sk);
257 
258 	rt = NULL;
259 	if (err)
260 		goto failure;
261 
262 	return 0;
263 
264 failure:
265 	/*
266 	 * This unhashes the socket and releases the local port,
267 	 * if necessary.
268 	 */
269 	tcp_set_state(sk, TCP_CLOSE);
270 	ip_rt_put(rt);
271 	sk->sk_route_caps = 0;
272 	inet->inet_dport = 0;
273 	return err;
274 }
275 EXPORT_SYMBOL(tcp_v4_connect);
276 
277 /*
278  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
279  * It can be called through tcp_release_cb() if socket was owned by user
280  * at the time tcp_v4_err() was called to handle ICMP message.
281  */
282 static void tcp_v4_mtu_reduced(struct sock *sk)
283 {
284 	struct dst_entry *dst;
285 	struct inet_sock *inet = inet_sk(sk);
286 	u32 mtu = tcp_sk(sk)->mtu_info;
287 
288 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
289 	 * send out by Linux are always <576bytes so they should go through
290 	 * unfragmented).
291 	 */
292 	if (sk->sk_state == TCP_LISTEN)
293 		return;
294 
295 	dst = inet_csk_update_pmtu(sk, mtu);
296 	if (!dst)
297 		return;
298 
299 	/* Something is about to be wrong... Remember soft error
300 	 * for the case, if this connection will not able to recover.
301 	 */
302 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
303 		sk->sk_err_soft = EMSGSIZE;
304 
305 	mtu = dst_mtu(dst);
306 
307 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
308 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
309 		tcp_sync_mss(sk, mtu);
310 
311 		/* Resend the TCP packet because it's
312 		 * clear that the old packet has been
313 		 * dropped. This is the new "fast" path mtu
314 		 * discovery.
315 		 */
316 		tcp_simple_retransmit(sk);
317 	} /* else let the usual retransmit timer handle it */
318 }
319 
320 static void do_redirect(struct sk_buff *skb, struct sock *sk)
321 {
322 	struct dst_entry *dst = __sk_dst_check(sk, 0);
323 
324 	if (dst)
325 		dst->ops->redirect(dst, sk, skb);
326 }
327 
328 /*
329  * This routine is called by the ICMP module when it gets some
330  * sort of error condition.  If err < 0 then the socket should
331  * be closed and the error returned to the user.  If err > 0
332  * it's just the icmp type << 8 | icmp code.  After adjustment
333  * header points to the first 8 bytes of the tcp header.  We need
334  * to find the appropriate port.
335  *
336  * The locking strategy used here is very "optimistic". When
337  * someone else accesses the socket the ICMP is just dropped
338  * and for some paths there is no check at all.
339  * A more general error queue to queue errors for later handling
340  * is probably better.
341  *
342  */
343 
344 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
345 {
346 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
347 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
348 	struct inet_connection_sock *icsk;
349 	struct tcp_sock *tp;
350 	struct inet_sock *inet;
351 	const int type = icmp_hdr(icmp_skb)->type;
352 	const int code = icmp_hdr(icmp_skb)->code;
353 	struct sock *sk;
354 	struct sk_buff *skb;
355 	__u32 seq;
356 	__u32 remaining;
357 	int err;
358 	struct net *net = dev_net(icmp_skb->dev);
359 
360 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
361 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
362 		return;
363 	}
364 
365 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
366 			iph->saddr, th->source, inet_iif(icmp_skb));
367 	if (!sk) {
368 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
369 		return;
370 	}
371 	if (sk->sk_state == TCP_TIME_WAIT) {
372 		inet_twsk_put(inet_twsk(sk));
373 		return;
374 	}
375 
376 	bh_lock_sock(sk);
377 	/* If too many ICMPs get dropped on busy
378 	 * servers this needs to be solved differently.
379 	 * We do take care of PMTU discovery (RFC1191) special case :
380 	 * we can receive locally generated ICMP messages while socket is held.
381 	 */
382 	if (sock_owned_by_user(sk) &&
383 	    type != ICMP_DEST_UNREACH &&
384 	    code != ICMP_FRAG_NEEDED)
385 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
386 
387 	if (sk->sk_state == TCP_CLOSE)
388 		goto out;
389 
390 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
391 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
392 		goto out;
393 	}
394 
395 	icsk = inet_csk(sk);
396 	tp = tcp_sk(sk);
397 	seq = ntohl(th->seq);
398 	if (sk->sk_state != TCP_LISTEN &&
399 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
400 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
401 		goto out;
402 	}
403 
404 	switch (type) {
405 	case ICMP_REDIRECT:
406 		do_redirect(icmp_skb, sk);
407 		goto out;
408 	case ICMP_SOURCE_QUENCH:
409 		/* Just silently ignore these. */
410 		goto out;
411 	case ICMP_PARAMETERPROB:
412 		err = EPROTO;
413 		break;
414 	case ICMP_DEST_UNREACH:
415 		if (code > NR_ICMP_UNREACH)
416 			goto out;
417 
418 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
419 			tp->mtu_info = info;
420 			if (!sock_owned_by_user(sk)) {
421 				tcp_v4_mtu_reduced(sk);
422 			} else {
423 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
424 					sock_hold(sk);
425 			}
426 			goto out;
427 		}
428 
429 		err = icmp_err_convert[code].errno;
430 		/* check if icmp_skb allows revert of backoff
431 		 * (see draft-zimmermann-tcp-lcd) */
432 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
433 			break;
434 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
435 		    !icsk->icsk_backoff)
436 			break;
437 
438 		if (sock_owned_by_user(sk))
439 			break;
440 
441 		icsk->icsk_backoff--;
442 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
443 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
444 		tcp_bound_rto(sk);
445 
446 		skb = tcp_write_queue_head(sk);
447 		BUG_ON(!skb);
448 
449 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
450 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
451 
452 		if (remaining) {
453 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
454 						  remaining, TCP_RTO_MAX);
455 		} else {
456 			/* RTO revert clocked out retransmission.
457 			 * Will retransmit now */
458 			tcp_retransmit_timer(sk);
459 		}
460 
461 		break;
462 	case ICMP_TIME_EXCEEDED:
463 		err = EHOSTUNREACH;
464 		break;
465 	default:
466 		goto out;
467 	}
468 
469 	switch (sk->sk_state) {
470 		struct request_sock *req, **prev;
471 	case TCP_LISTEN:
472 		if (sock_owned_by_user(sk))
473 			goto out;
474 
475 		req = inet_csk_search_req(sk, &prev, th->dest,
476 					  iph->daddr, iph->saddr);
477 		if (!req)
478 			goto out;
479 
480 		/* ICMPs are not backlogged, hence we cannot get
481 		   an established socket here.
482 		 */
483 		WARN_ON(req->sk);
484 
485 		if (seq != tcp_rsk(req)->snt_isn) {
486 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
487 			goto out;
488 		}
489 
490 		/*
491 		 * Still in SYN_RECV, just remove it silently.
492 		 * There is no good way to pass the error to the newly
493 		 * created socket, and POSIX does not want network
494 		 * errors returned from accept().
495 		 */
496 		inet_csk_reqsk_queue_drop(sk, req, prev);
497 		goto out;
498 
499 	case TCP_SYN_SENT:
500 	case TCP_SYN_RECV:  /* Cannot happen.
501 			       It can f.e. if SYNs crossed.
502 			     */
503 		if (!sock_owned_by_user(sk)) {
504 			sk->sk_err = err;
505 
506 			sk->sk_error_report(sk);
507 
508 			tcp_done(sk);
509 		} else {
510 			sk->sk_err_soft = err;
511 		}
512 		goto out;
513 	}
514 
515 	/* If we've already connected we will keep trying
516 	 * until we time out, or the user gives up.
517 	 *
518 	 * rfc1122 4.2.3.9 allows to consider as hard errors
519 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
520 	 * but it is obsoleted by pmtu discovery).
521 	 *
522 	 * Note, that in modern internet, where routing is unreliable
523 	 * and in each dark corner broken firewalls sit, sending random
524 	 * errors ordered by their masters even this two messages finally lose
525 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
526 	 *
527 	 * Now we are in compliance with RFCs.
528 	 *							--ANK (980905)
529 	 */
530 
531 	inet = inet_sk(sk);
532 	if (!sock_owned_by_user(sk) && inet->recverr) {
533 		sk->sk_err = err;
534 		sk->sk_error_report(sk);
535 	} else	{ /* Only an error on timeout */
536 		sk->sk_err_soft = err;
537 	}
538 
539 out:
540 	bh_unlock_sock(sk);
541 	sock_put(sk);
542 }
543 
544 static void __tcp_v4_send_check(struct sk_buff *skb,
545 				__be32 saddr, __be32 daddr)
546 {
547 	struct tcphdr *th = tcp_hdr(skb);
548 
549 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
550 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
551 		skb->csum_start = skb_transport_header(skb) - skb->head;
552 		skb->csum_offset = offsetof(struct tcphdr, check);
553 	} else {
554 		th->check = tcp_v4_check(skb->len, saddr, daddr,
555 					 csum_partial(th,
556 						      th->doff << 2,
557 						      skb->csum));
558 	}
559 }
560 
561 /* This routine computes an IPv4 TCP checksum. */
562 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
563 {
564 	const struct inet_sock *inet = inet_sk(sk);
565 
566 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
567 }
568 EXPORT_SYMBOL(tcp_v4_send_check);
569 
570 int tcp_v4_gso_send_check(struct sk_buff *skb)
571 {
572 	const struct iphdr *iph;
573 	struct tcphdr *th;
574 
575 	if (!pskb_may_pull(skb, sizeof(*th)))
576 		return -EINVAL;
577 
578 	iph = ip_hdr(skb);
579 	th = tcp_hdr(skb);
580 
581 	th->check = 0;
582 	skb->ip_summed = CHECKSUM_PARTIAL;
583 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
584 	return 0;
585 }
586 
587 /*
588  *	This routine will send an RST to the other tcp.
589  *
590  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
591  *		      for reset.
592  *	Answer: if a packet caused RST, it is not for a socket
593  *		existing in our system, if it is matched to a socket,
594  *		it is just duplicate segment or bug in other side's TCP.
595  *		So that we build reply only basing on parameters
596  *		arrived with segment.
597  *	Exception: precedence violation. We do not implement it in any case.
598  */
599 
600 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
601 {
602 	const struct tcphdr *th = tcp_hdr(skb);
603 	struct {
604 		struct tcphdr th;
605 #ifdef CONFIG_TCP_MD5SIG
606 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
607 #endif
608 	} rep;
609 	struct ip_reply_arg arg;
610 #ifdef CONFIG_TCP_MD5SIG
611 	struct tcp_md5sig_key *key;
612 	const __u8 *hash_location = NULL;
613 	unsigned char newhash[16];
614 	int genhash;
615 	struct sock *sk1 = NULL;
616 #endif
617 	struct net *net;
618 
619 	/* Never send a reset in response to a reset. */
620 	if (th->rst)
621 		return;
622 
623 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
624 		return;
625 
626 	/* Swap the send and the receive. */
627 	memset(&rep, 0, sizeof(rep));
628 	rep.th.dest   = th->source;
629 	rep.th.source = th->dest;
630 	rep.th.doff   = sizeof(struct tcphdr) / 4;
631 	rep.th.rst    = 1;
632 
633 	if (th->ack) {
634 		rep.th.seq = th->ack_seq;
635 	} else {
636 		rep.th.ack = 1;
637 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
638 				       skb->len - (th->doff << 2));
639 	}
640 
641 	memset(&arg, 0, sizeof(arg));
642 	arg.iov[0].iov_base = (unsigned char *)&rep;
643 	arg.iov[0].iov_len  = sizeof(rep.th);
644 
645 #ifdef CONFIG_TCP_MD5SIG
646 	hash_location = tcp_parse_md5sig_option(th);
647 	if (!sk && hash_location) {
648 		/*
649 		 * active side is lost. Try to find listening socket through
650 		 * source port, and then find md5 key through listening socket.
651 		 * we are not loose security here:
652 		 * Incoming packet is checked with md5 hash with finding key,
653 		 * no RST generated if md5 hash doesn't match.
654 		 */
655 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
656 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
657 					     ntohs(th->source), inet_iif(skb));
658 		/* don't send rst if it can't find key */
659 		if (!sk1)
660 			return;
661 		rcu_read_lock();
662 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
663 					&ip_hdr(skb)->saddr, AF_INET);
664 		if (!key)
665 			goto release_sk1;
666 
667 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
668 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
669 			goto release_sk1;
670 	} else {
671 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
672 					     &ip_hdr(skb)->saddr,
673 					     AF_INET) : NULL;
674 	}
675 
676 	if (key) {
677 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
678 				   (TCPOPT_NOP << 16) |
679 				   (TCPOPT_MD5SIG << 8) |
680 				   TCPOLEN_MD5SIG);
681 		/* Update length and the length the header thinks exists */
682 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
683 		rep.th.doff = arg.iov[0].iov_len / 4;
684 
685 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
686 				     key, ip_hdr(skb)->saddr,
687 				     ip_hdr(skb)->daddr, &rep.th);
688 	}
689 #endif
690 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
691 				      ip_hdr(skb)->saddr, /* XXX */
692 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
693 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
694 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
695 	/* When socket is gone, all binding information is lost.
696 	 * routing might fail in this case. using iif for oif to
697 	 * make sure we can deliver it
698 	 */
699 	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
700 
701 	net = dev_net(skb_dst(skb)->dev);
702 	arg.tos = ip_hdr(skb)->tos;
703 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
704 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
705 
706 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
707 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
708 
709 #ifdef CONFIG_TCP_MD5SIG
710 release_sk1:
711 	if (sk1) {
712 		rcu_read_unlock();
713 		sock_put(sk1);
714 	}
715 #endif
716 }
717 
718 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
719    outside socket context is ugly, certainly. What can I do?
720  */
721 
722 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
723 			    u32 win, u32 ts, int oif,
724 			    struct tcp_md5sig_key *key,
725 			    int reply_flags, u8 tos)
726 {
727 	const struct tcphdr *th = tcp_hdr(skb);
728 	struct {
729 		struct tcphdr th;
730 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
731 #ifdef CONFIG_TCP_MD5SIG
732 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
733 #endif
734 			];
735 	} rep;
736 	struct ip_reply_arg arg;
737 	struct net *net = dev_net(skb_dst(skb)->dev);
738 
739 	memset(&rep.th, 0, sizeof(struct tcphdr));
740 	memset(&arg, 0, sizeof(arg));
741 
742 	arg.iov[0].iov_base = (unsigned char *)&rep;
743 	arg.iov[0].iov_len  = sizeof(rep.th);
744 	if (ts) {
745 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
746 				   (TCPOPT_TIMESTAMP << 8) |
747 				   TCPOLEN_TIMESTAMP);
748 		rep.opt[1] = htonl(tcp_time_stamp);
749 		rep.opt[2] = htonl(ts);
750 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
751 	}
752 
753 	/* Swap the send and the receive. */
754 	rep.th.dest    = th->source;
755 	rep.th.source  = th->dest;
756 	rep.th.doff    = arg.iov[0].iov_len / 4;
757 	rep.th.seq     = htonl(seq);
758 	rep.th.ack_seq = htonl(ack);
759 	rep.th.ack     = 1;
760 	rep.th.window  = htons(win);
761 
762 #ifdef CONFIG_TCP_MD5SIG
763 	if (key) {
764 		int offset = (ts) ? 3 : 0;
765 
766 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
767 					  (TCPOPT_NOP << 16) |
768 					  (TCPOPT_MD5SIG << 8) |
769 					  TCPOLEN_MD5SIG);
770 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
771 		rep.th.doff = arg.iov[0].iov_len/4;
772 
773 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
774 				    key, ip_hdr(skb)->saddr,
775 				    ip_hdr(skb)->daddr, &rep.th);
776 	}
777 #endif
778 	arg.flags = reply_flags;
779 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
780 				      ip_hdr(skb)->saddr, /* XXX */
781 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
782 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
783 	if (oif)
784 		arg.bound_dev_if = oif;
785 	arg.tos = tos;
786 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
787 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
788 
789 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
790 }
791 
792 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
793 {
794 	struct inet_timewait_sock *tw = inet_twsk(sk);
795 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
796 
797 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
798 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
799 			tcptw->tw_ts_recent,
800 			tw->tw_bound_dev_if,
801 			tcp_twsk_md5_key(tcptw),
802 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
803 			tw->tw_tos
804 			);
805 
806 	inet_twsk_put(tw);
807 }
808 
809 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
810 				  struct request_sock *req)
811 {
812 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
813 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
814 			req->ts_recent,
815 			0,
816 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
817 					  AF_INET),
818 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
819 			ip_hdr(skb)->tos);
820 }
821 
822 /*
823  *	Send a SYN-ACK after having received a SYN.
824  *	This still operates on a request_sock only, not on a big
825  *	socket.
826  */
827 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
828 			      struct request_sock *req,
829 			      struct request_values *rvp,
830 			      u16 queue_mapping,
831 			      bool nocache)
832 {
833 	const struct inet_request_sock *ireq = inet_rsk(req);
834 	struct flowi4 fl4;
835 	int err = -1;
836 	struct sk_buff * skb;
837 
838 	/* First, grab a route. */
839 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
840 		return -1;
841 
842 	skb = tcp_make_synack(sk, dst, req, rvp);
843 
844 	if (skb) {
845 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
846 
847 		skb_set_queue_mapping(skb, queue_mapping);
848 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
849 					    ireq->rmt_addr,
850 					    ireq->opt);
851 		err = net_xmit_eval(err);
852 	}
853 
854 	return err;
855 }
856 
857 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
858 			      struct request_values *rvp)
859 {
860 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
861 	return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
862 }
863 
864 /*
865  *	IPv4 request_sock destructor.
866  */
867 static void tcp_v4_reqsk_destructor(struct request_sock *req)
868 {
869 	kfree(inet_rsk(req)->opt);
870 }
871 
872 /*
873  * Return true if a syncookie should be sent
874  */
875 bool tcp_syn_flood_action(struct sock *sk,
876 			 const struct sk_buff *skb,
877 			 const char *proto)
878 {
879 	const char *msg = "Dropping request";
880 	bool want_cookie = false;
881 	struct listen_sock *lopt;
882 
883 
884 
885 #ifdef CONFIG_SYN_COOKIES
886 	if (sysctl_tcp_syncookies) {
887 		msg = "Sending cookies";
888 		want_cookie = true;
889 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
890 	} else
891 #endif
892 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
893 
894 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
895 	if (!lopt->synflood_warned) {
896 		lopt->synflood_warned = 1;
897 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
898 			proto, ntohs(tcp_hdr(skb)->dest), msg);
899 	}
900 	return want_cookie;
901 }
902 EXPORT_SYMBOL(tcp_syn_flood_action);
903 
904 /*
905  * Save and compile IPv4 options into the request_sock if needed.
906  */
907 static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
908 						  struct sk_buff *skb)
909 {
910 	const struct ip_options *opt = &(IPCB(skb)->opt);
911 	struct ip_options_rcu *dopt = NULL;
912 
913 	if (opt && opt->optlen) {
914 		int opt_size = sizeof(*dopt) + opt->optlen;
915 
916 		dopt = kmalloc(opt_size, GFP_ATOMIC);
917 		if (dopt) {
918 			if (ip_options_echo(&dopt->opt, skb)) {
919 				kfree(dopt);
920 				dopt = NULL;
921 			}
922 		}
923 	}
924 	return dopt;
925 }
926 
927 #ifdef CONFIG_TCP_MD5SIG
928 /*
929  * RFC2385 MD5 checksumming requires a mapping of
930  * IP address->MD5 Key.
931  * We need to maintain these in the sk structure.
932  */
933 
934 /* Find the Key structure for an address.  */
935 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
936 					 const union tcp_md5_addr *addr,
937 					 int family)
938 {
939 	struct tcp_sock *tp = tcp_sk(sk);
940 	struct tcp_md5sig_key *key;
941 	struct hlist_node *pos;
942 	unsigned int size = sizeof(struct in_addr);
943 	struct tcp_md5sig_info *md5sig;
944 
945 	/* caller either holds rcu_read_lock() or socket lock */
946 	md5sig = rcu_dereference_check(tp->md5sig_info,
947 				       sock_owned_by_user(sk) ||
948 				       lockdep_is_held(&sk->sk_lock.slock));
949 	if (!md5sig)
950 		return NULL;
951 #if IS_ENABLED(CONFIG_IPV6)
952 	if (family == AF_INET6)
953 		size = sizeof(struct in6_addr);
954 #endif
955 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
956 		if (key->family != family)
957 			continue;
958 		if (!memcmp(&key->addr, addr, size))
959 			return key;
960 	}
961 	return NULL;
962 }
963 EXPORT_SYMBOL(tcp_md5_do_lookup);
964 
965 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
966 					 struct sock *addr_sk)
967 {
968 	union tcp_md5_addr *addr;
969 
970 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
971 	return tcp_md5_do_lookup(sk, addr, AF_INET);
972 }
973 EXPORT_SYMBOL(tcp_v4_md5_lookup);
974 
975 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
976 						      struct request_sock *req)
977 {
978 	union tcp_md5_addr *addr;
979 
980 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
981 	return tcp_md5_do_lookup(sk, addr, AF_INET);
982 }
983 
984 /* This can be called on a newly created socket, from other files */
985 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
986 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
987 {
988 	/* Add Key to the list */
989 	struct tcp_md5sig_key *key;
990 	struct tcp_sock *tp = tcp_sk(sk);
991 	struct tcp_md5sig_info *md5sig;
992 
993 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
994 	if (key) {
995 		/* Pre-existing entry - just update that one. */
996 		memcpy(key->key, newkey, newkeylen);
997 		key->keylen = newkeylen;
998 		return 0;
999 	}
1000 
1001 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1002 					   sock_owned_by_user(sk));
1003 	if (!md5sig) {
1004 		md5sig = kmalloc(sizeof(*md5sig), gfp);
1005 		if (!md5sig)
1006 			return -ENOMEM;
1007 
1008 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1009 		INIT_HLIST_HEAD(&md5sig->head);
1010 		rcu_assign_pointer(tp->md5sig_info, md5sig);
1011 	}
1012 
1013 	key = sock_kmalloc(sk, sizeof(*key), gfp);
1014 	if (!key)
1015 		return -ENOMEM;
1016 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1017 		sock_kfree_s(sk, key, sizeof(*key));
1018 		return -ENOMEM;
1019 	}
1020 
1021 	memcpy(key->key, newkey, newkeylen);
1022 	key->keylen = newkeylen;
1023 	key->family = family;
1024 	memcpy(&key->addr, addr,
1025 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1026 				      sizeof(struct in_addr));
1027 	hlist_add_head_rcu(&key->node, &md5sig->head);
1028 	return 0;
1029 }
1030 EXPORT_SYMBOL(tcp_md5_do_add);
1031 
1032 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1033 {
1034 	struct tcp_sock *tp = tcp_sk(sk);
1035 	struct tcp_md5sig_key *key;
1036 	struct tcp_md5sig_info *md5sig;
1037 
1038 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1039 	if (!key)
1040 		return -ENOENT;
1041 	hlist_del_rcu(&key->node);
1042 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1043 	kfree_rcu(key, rcu);
1044 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1045 					   sock_owned_by_user(sk));
1046 	if (hlist_empty(&md5sig->head))
1047 		tcp_free_md5sig_pool();
1048 	return 0;
1049 }
1050 EXPORT_SYMBOL(tcp_md5_do_del);
1051 
1052 void tcp_clear_md5_list(struct sock *sk)
1053 {
1054 	struct tcp_sock *tp = tcp_sk(sk);
1055 	struct tcp_md5sig_key *key;
1056 	struct hlist_node *pos, *n;
1057 	struct tcp_md5sig_info *md5sig;
1058 
1059 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1060 
1061 	if (!hlist_empty(&md5sig->head))
1062 		tcp_free_md5sig_pool();
1063 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1064 		hlist_del_rcu(&key->node);
1065 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1066 		kfree_rcu(key, rcu);
1067 	}
1068 }
1069 
1070 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1071 				 int optlen)
1072 {
1073 	struct tcp_md5sig cmd;
1074 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1075 
1076 	if (optlen < sizeof(cmd))
1077 		return -EINVAL;
1078 
1079 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1080 		return -EFAULT;
1081 
1082 	if (sin->sin_family != AF_INET)
1083 		return -EINVAL;
1084 
1085 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1086 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1087 				      AF_INET);
1088 
1089 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1090 		return -EINVAL;
1091 
1092 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1093 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1094 			      GFP_KERNEL);
1095 }
1096 
1097 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1098 					__be32 daddr, __be32 saddr, int nbytes)
1099 {
1100 	struct tcp4_pseudohdr *bp;
1101 	struct scatterlist sg;
1102 
1103 	bp = &hp->md5_blk.ip4;
1104 
1105 	/*
1106 	 * 1. the TCP pseudo-header (in the order: source IP address,
1107 	 * destination IP address, zero-padded protocol number, and
1108 	 * segment length)
1109 	 */
1110 	bp->saddr = saddr;
1111 	bp->daddr = daddr;
1112 	bp->pad = 0;
1113 	bp->protocol = IPPROTO_TCP;
1114 	bp->len = cpu_to_be16(nbytes);
1115 
1116 	sg_init_one(&sg, bp, sizeof(*bp));
1117 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1118 }
1119 
1120 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1121 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1122 {
1123 	struct tcp_md5sig_pool *hp;
1124 	struct hash_desc *desc;
1125 
1126 	hp = tcp_get_md5sig_pool();
1127 	if (!hp)
1128 		goto clear_hash_noput;
1129 	desc = &hp->md5_desc;
1130 
1131 	if (crypto_hash_init(desc))
1132 		goto clear_hash;
1133 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1134 		goto clear_hash;
1135 	if (tcp_md5_hash_header(hp, th))
1136 		goto clear_hash;
1137 	if (tcp_md5_hash_key(hp, key))
1138 		goto clear_hash;
1139 	if (crypto_hash_final(desc, md5_hash))
1140 		goto clear_hash;
1141 
1142 	tcp_put_md5sig_pool();
1143 	return 0;
1144 
1145 clear_hash:
1146 	tcp_put_md5sig_pool();
1147 clear_hash_noput:
1148 	memset(md5_hash, 0, 16);
1149 	return 1;
1150 }
1151 
1152 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1153 			const struct sock *sk, const struct request_sock *req,
1154 			const struct sk_buff *skb)
1155 {
1156 	struct tcp_md5sig_pool *hp;
1157 	struct hash_desc *desc;
1158 	const struct tcphdr *th = tcp_hdr(skb);
1159 	__be32 saddr, daddr;
1160 
1161 	if (sk) {
1162 		saddr = inet_sk(sk)->inet_saddr;
1163 		daddr = inet_sk(sk)->inet_daddr;
1164 	} else if (req) {
1165 		saddr = inet_rsk(req)->loc_addr;
1166 		daddr = inet_rsk(req)->rmt_addr;
1167 	} else {
1168 		const struct iphdr *iph = ip_hdr(skb);
1169 		saddr = iph->saddr;
1170 		daddr = iph->daddr;
1171 	}
1172 
1173 	hp = tcp_get_md5sig_pool();
1174 	if (!hp)
1175 		goto clear_hash_noput;
1176 	desc = &hp->md5_desc;
1177 
1178 	if (crypto_hash_init(desc))
1179 		goto clear_hash;
1180 
1181 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1182 		goto clear_hash;
1183 	if (tcp_md5_hash_header(hp, th))
1184 		goto clear_hash;
1185 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1186 		goto clear_hash;
1187 	if (tcp_md5_hash_key(hp, key))
1188 		goto clear_hash;
1189 	if (crypto_hash_final(desc, md5_hash))
1190 		goto clear_hash;
1191 
1192 	tcp_put_md5sig_pool();
1193 	return 0;
1194 
1195 clear_hash:
1196 	tcp_put_md5sig_pool();
1197 clear_hash_noput:
1198 	memset(md5_hash, 0, 16);
1199 	return 1;
1200 }
1201 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1202 
1203 static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1204 {
1205 	/*
1206 	 * This gets called for each TCP segment that arrives
1207 	 * so we want to be efficient.
1208 	 * We have 3 drop cases:
1209 	 * o No MD5 hash and one expected.
1210 	 * o MD5 hash and we're not expecting one.
1211 	 * o MD5 hash and its wrong.
1212 	 */
1213 	const __u8 *hash_location = NULL;
1214 	struct tcp_md5sig_key *hash_expected;
1215 	const struct iphdr *iph = ip_hdr(skb);
1216 	const struct tcphdr *th = tcp_hdr(skb);
1217 	int genhash;
1218 	unsigned char newhash[16];
1219 
1220 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1221 					  AF_INET);
1222 	hash_location = tcp_parse_md5sig_option(th);
1223 
1224 	/* We've parsed the options - do we have a hash? */
1225 	if (!hash_expected && !hash_location)
1226 		return false;
1227 
1228 	if (hash_expected && !hash_location) {
1229 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1230 		return true;
1231 	}
1232 
1233 	if (!hash_expected && hash_location) {
1234 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1235 		return true;
1236 	}
1237 
1238 	/* Okay, so this is hash_expected and hash_location -
1239 	 * so we need to calculate the checksum.
1240 	 */
1241 	genhash = tcp_v4_md5_hash_skb(newhash,
1242 				      hash_expected,
1243 				      NULL, NULL, skb);
1244 
1245 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1246 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1247 				     &iph->saddr, ntohs(th->source),
1248 				     &iph->daddr, ntohs(th->dest),
1249 				     genhash ? " tcp_v4_calc_md5_hash failed"
1250 				     : "");
1251 		return true;
1252 	}
1253 	return false;
1254 }
1255 
1256 #endif
1257 
1258 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1259 	.family		=	PF_INET,
1260 	.obj_size	=	sizeof(struct tcp_request_sock),
1261 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1262 	.send_ack	=	tcp_v4_reqsk_send_ack,
1263 	.destructor	=	tcp_v4_reqsk_destructor,
1264 	.send_reset	=	tcp_v4_send_reset,
1265 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1266 };
1267 
1268 #ifdef CONFIG_TCP_MD5SIG
1269 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1270 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1271 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1272 };
1273 #endif
1274 
1275 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1276 {
1277 	struct tcp_extend_values tmp_ext;
1278 	struct tcp_options_received tmp_opt;
1279 	const u8 *hash_location;
1280 	struct request_sock *req;
1281 	struct inet_request_sock *ireq;
1282 	struct tcp_sock *tp = tcp_sk(sk);
1283 	struct dst_entry *dst = NULL;
1284 	__be32 saddr = ip_hdr(skb)->saddr;
1285 	__be32 daddr = ip_hdr(skb)->daddr;
1286 	__u32 isn = TCP_SKB_CB(skb)->when;
1287 	bool want_cookie = false;
1288 
1289 	/* Never answer to SYNs send to broadcast or multicast */
1290 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1291 		goto drop;
1292 
1293 	/* TW buckets are converted to open requests without
1294 	 * limitations, they conserve resources and peer is
1295 	 * evidently real one.
1296 	 */
1297 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1298 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1299 		if (!want_cookie)
1300 			goto drop;
1301 	}
1302 
1303 	/* Accept backlog is full. If we have already queued enough
1304 	 * of warm entries in syn queue, drop request. It is better than
1305 	 * clogging syn queue with openreqs with exponentially increasing
1306 	 * timeout.
1307 	 */
1308 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1309 		goto drop;
1310 
1311 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1312 	if (!req)
1313 		goto drop;
1314 
1315 #ifdef CONFIG_TCP_MD5SIG
1316 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1317 #endif
1318 
1319 	tcp_clear_options(&tmp_opt);
1320 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1321 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1322 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1323 
1324 	if (tmp_opt.cookie_plus > 0 &&
1325 	    tmp_opt.saw_tstamp &&
1326 	    !tp->rx_opt.cookie_out_never &&
1327 	    (sysctl_tcp_cookie_size > 0 ||
1328 	     (tp->cookie_values != NULL &&
1329 	      tp->cookie_values->cookie_desired > 0))) {
1330 		u8 *c;
1331 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1332 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1333 
1334 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1335 			goto drop_and_release;
1336 
1337 		/* Secret recipe starts with IP addresses */
1338 		*mess++ ^= (__force u32)daddr;
1339 		*mess++ ^= (__force u32)saddr;
1340 
1341 		/* plus variable length Initiator Cookie */
1342 		c = (u8 *)mess;
1343 		while (l-- > 0)
1344 			*c++ ^= *hash_location++;
1345 
1346 		want_cookie = false;	/* not our kind of cookie */
1347 		tmp_ext.cookie_out_never = 0; /* false */
1348 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1349 	} else if (!tp->rx_opt.cookie_in_always) {
1350 		/* redundant indications, but ensure initialization. */
1351 		tmp_ext.cookie_out_never = 1; /* true */
1352 		tmp_ext.cookie_plus = 0;
1353 	} else {
1354 		goto drop_and_release;
1355 	}
1356 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1357 
1358 	if (want_cookie && !tmp_opt.saw_tstamp)
1359 		tcp_clear_options(&tmp_opt);
1360 
1361 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1362 	tcp_openreq_init(req, &tmp_opt, skb);
1363 
1364 	ireq = inet_rsk(req);
1365 	ireq->loc_addr = daddr;
1366 	ireq->rmt_addr = saddr;
1367 	ireq->no_srccheck = inet_sk(sk)->transparent;
1368 	ireq->opt = tcp_v4_save_options(sk, skb);
1369 
1370 	if (security_inet_conn_request(sk, skb, req))
1371 		goto drop_and_free;
1372 
1373 	if (!want_cookie || tmp_opt.tstamp_ok)
1374 		TCP_ECN_create_request(req, skb);
1375 
1376 	if (want_cookie) {
1377 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1378 		req->cookie_ts = tmp_opt.tstamp_ok;
1379 	} else if (!isn) {
1380 		struct flowi4 fl4;
1381 
1382 		/* VJ's idea. We save last timestamp seen
1383 		 * from the destination in peer table, when entering
1384 		 * state TIME-WAIT, and check against it before
1385 		 * accepting new connection request.
1386 		 *
1387 		 * If "isn" is not zero, this request hit alive
1388 		 * timewait bucket, so that all the necessary checks
1389 		 * are made in the function processing timewait state.
1390 		 */
1391 		if (tmp_opt.saw_tstamp &&
1392 		    tcp_death_row.sysctl_tw_recycle &&
1393 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1394 		    fl4.daddr == saddr) {
1395 			if (!tcp_peer_is_proven(req, dst, true)) {
1396 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1397 				goto drop_and_release;
1398 			}
1399 		}
1400 		/* Kill the following clause, if you dislike this way. */
1401 		else if (!sysctl_tcp_syncookies &&
1402 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1403 			  (sysctl_max_syn_backlog >> 2)) &&
1404 			 !tcp_peer_is_proven(req, dst, false)) {
1405 			/* Without syncookies last quarter of
1406 			 * backlog is filled with destinations,
1407 			 * proven to be alive.
1408 			 * It means that we continue to communicate
1409 			 * to destinations, already remembered
1410 			 * to the moment of synflood.
1411 			 */
1412 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1413 				       &saddr, ntohs(tcp_hdr(skb)->source));
1414 			goto drop_and_release;
1415 		}
1416 
1417 		isn = tcp_v4_init_sequence(skb);
1418 	}
1419 	tcp_rsk(req)->snt_isn = isn;
1420 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
1421 
1422 	if (tcp_v4_send_synack(sk, dst, req,
1423 			       (struct request_values *)&tmp_ext,
1424 			       skb_get_queue_mapping(skb),
1425 			       want_cookie) ||
1426 	    want_cookie)
1427 		goto drop_and_free;
1428 
1429 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1430 	return 0;
1431 
1432 drop_and_release:
1433 	dst_release(dst);
1434 drop_and_free:
1435 	reqsk_free(req);
1436 drop:
1437 	return 0;
1438 }
1439 EXPORT_SYMBOL(tcp_v4_conn_request);
1440 
1441 
1442 /*
1443  * The three way handshake has completed - we got a valid synack -
1444  * now create the new socket.
1445  */
1446 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1447 				  struct request_sock *req,
1448 				  struct dst_entry *dst)
1449 {
1450 	struct inet_request_sock *ireq;
1451 	struct inet_sock *newinet;
1452 	struct tcp_sock *newtp;
1453 	struct sock *newsk;
1454 #ifdef CONFIG_TCP_MD5SIG
1455 	struct tcp_md5sig_key *key;
1456 #endif
1457 	struct ip_options_rcu *inet_opt;
1458 
1459 	if (sk_acceptq_is_full(sk))
1460 		goto exit_overflow;
1461 
1462 	newsk = tcp_create_openreq_child(sk, req, skb);
1463 	if (!newsk)
1464 		goto exit_nonewsk;
1465 
1466 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1467 	inet_sk_rx_dst_set(newsk, skb);
1468 
1469 	newtp		      = tcp_sk(newsk);
1470 	newinet		      = inet_sk(newsk);
1471 	ireq		      = inet_rsk(req);
1472 	newinet->inet_daddr   = ireq->rmt_addr;
1473 	newinet->inet_rcv_saddr = ireq->loc_addr;
1474 	newinet->inet_saddr	      = ireq->loc_addr;
1475 	inet_opt	      = ireq->opt;
1476 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1477 	ireq->opt	      = NULL;
1478 	newinet->mc_index     = inet_iif(skb);
1479 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1480 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1481 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1482 	if (inet_opt)
1483 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1484 	newinet->inet_id = newtp->write_seq ^ jiffies;
1485 
1486 	if (!dst) {
1487 		dst = inet_csk_route_child_sock(sk, newsk, req);
1488 		if (!dst)
1489 			goto put_and_exit;
1490 	} else {
1491 		/* syncookie case : see end of cookie_v4_check() */
1492 	}
1493 	sk_setup_caps(newsk, dst);
1494 
1495 	tcp_mtup_init(newsk);
1496 	tcp_sync_mss(newsk, dst_mtu(dst));
1497 	newtp->advmss = dst_metric_advmss(dst);
1498 	if (tcp_sk(sk)->rx_opt.user_mss &&
1499 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1500 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1501 
1502 	tcp_initialize_rcv_mss(newsk);
1503 	if (tcp_rsk(req)->snt_synack)
1504 		tcp_valid_rtt_meas(newsk,
1505 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1506 	newtp->total_retrans = req->retrans;
1507 
1508 #ifdef CONFIG_TCP_MD5SIG
1509 	/* Copy over the MD5 key from the original socket */
1510 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1511 				AF_INET);
1512 	if (key != NULL) {
1513 		/*
1514 		 * We're using one, so create a matching key
1515 		 * on the newsk structure. If we fail to get
1516 		 * memory, then we end up not copying the key
1517 		 * across. Shucks.
1518 		 */
1519 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1520 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1521 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1522 	}
1523 #endif
1524 
1525 	if (__inet_inherit_port(sk, newsk) < 0)
1526 		goto put_and_exit;
1527 	__inet_hash_nolisten(newsk, NULL);
1528 
1529 	return newsk;
1530 
1531 exit_overflow:
1532 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1533 exit_nonewsk:
1534 	dst_release(dst);
1535 exit:
1536 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1537 	return NULL;
1538 put_and_exit:
1539 	tcp_clear_xmit_timers(newsk);
1540 	tcp_cleanup_congestion_control(newsk);
1541 	bh_unlock_sock(newsk);
1542 	sock_put(newsk);
1543 	goto exit;
1544 }
1545 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1546 
1547 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1548 {
1549 	struct tcphdr *th = tcp_hdr(skb);
1550 	const struct iphdr *iph = ip_hdr(skb);
1551 	struct sock *nsk;
1552 	struct request_sock **prev;
1553 	/* Find possible connection requests. */
1554 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1555 						       iph->saddr, iph->daddr);
1556 	if (req)
1557 		return tcp_check_req(sk, skb, req, prev);
1558 
1559 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1560 			th->source, iph->daddr, th->dest, inet_iif(skb));
1561 
1562 	if (nsk) {
1563 		if (nsk->sk_state != TCP_TIME_WAIT) {
1564 			bh_lock_sock(nsk);
1565 			return nsk;
1566 		}
1567 		inet_twsk_put(inet_twsk(nsk));
1568 		return NULL;
1569 	}
1570 
1571 #ifdef CONFIG_SYN_COOKIES
1572 	if (!th->syn)
1573 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1574 #endif
1575 	return sk;
1576 }
1577 
1578 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1579 {
1580 	const struct iphdr *iph = ip_hdr(skb);
1581 
1582 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1583 		if (!tcp_v4_check(skb->len, iph->saddr,
1584 				  iph->daddr, skb->csum)) {
1585 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1586 			return 0;
1587 		}
1588 	}
1589 
1590 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1591 				       skb->len, IPPROTO_TCP, 0);
1592 
1593 	if (skb->len <= 76) {
1594 		return __skb_checksum_complete(skb);
1595 	}
1596 	return 0;
1597 }
1598 
1599 
1600 /* The socket must have it's spinlock held when we get
1601  * here.
1602  *
1603  * We have a potential double-lock case here, so even when
1604  * doing backlog processing we use the BH locking scheme.
1605  * This is because we cannot sleep with the original spinlock
1606  * held.
1607  */
1608 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1609 {
1610 	struct sock *rsk;
1611 #ifdef CONFIG_TCP_MD5SIG
1612 	/*
1613 	 * We really want to reject the packet as early as possible
1614 	 * if:
1615 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1616 	 *  o There is an MD5 option and we're not expecting one
1617 	 */
1618 	if (tcp_v4_inbound_md5_hash(sk, skb))
1619 		goto discard;
1620 #endif
1621 
1622 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1623 		struct dst_entry *dst = sk->sk_rx_dst;
1624 
1625 		sock_rps_save_rxhash(sk, skb);
1626 		if (dst) {
1627 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1628 			    dst->ops->check(dst, 0) == NULL) {
1629 				dst_release(dst);
1630 				sk->sk_rx_dst = NULL;
1631 			}
1632 		}
1633 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1634 			rsk = sk;
1635 			goto reset;
1636 		}
1637 		return 0;
1638 	}
1639 
1640 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1641 		goto csum_err;
1642 
1643 	if (sk->sk_state == TCP_LISTEN) {
1644 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1645 		if (!nsk)
1646 			goto discard;
1647 
1648 		if (nsk != sk) {
1649 			sock_rps_save_rxhash(nsk, skb);
1650 			if (tcp_child_process(sk, nsk, skb)) {
1651 				rsk = nsk;
1652 				goto reset;
1653 			}
1654 			return 0;
1655 		}
1656 	} else
1657 		sock_rps_save_rxhash(sk, skb);
1658 
1659 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1660 		rsk = sk;
1661 		goto reset;
1662 	}
1663 	return 0;
1664 
1665 reset:
1666 	tcp_v4_send_reset(rsk, skb);
1667 discard:
1668 	kfree_skb(skb);
1669 	/* Be careful here. If this function gets more complicated and
1670 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1671 	 * might be destroyed here. This current version compiles correctly,
1672 	 * but you have been warned.
1673 	 */
1674 	return 0;
1675 
1676 csum_err:
1677 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1678 	goto discard;
1679 }
1680 EXPORT_SYMBOL(tcp_v4_do_rcv);
1681 
1682 void tcp_v4_early_demux(struct sk_buff *skb)
1683 {
1684 	struct net *net = dev_net(skb->dev);
1685 	const struct iphdr *iph;
1686 	const struct tcphdr *th;
1687 	struct sock *sk;
1688 
1689 	if (skb->pkt_type != PACKET_HOST)
1690 		return;
1691 
1692 	if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1693 		return;
1694 
1695 	iph = ip_hdr(skb);
1696 	th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1697 
1698 	if (th->doff < sizeof(struct tcphdr) / 4)
1699 		return;
1700 
1701 	sk = __inet_lookup_established(net, &tcp_hashinfo,
1702 				       iph->saddr, th->source,
1703 				       iph->daddr, ntohs(th->dest),
1704 				       skb->skb_iif);
1705 	if (sk) {
1706 		skb->sk = sk;
1707 		skb->destructor = sock_edemux;
1708 		if (sk->sk_state != TCP_TIME_WAIT) {
1709 			struct dst_entry *dst = sk->sk_rx_dst;
1710 
1711 			if (dst)
1712 				dst = dst_check(dst, 0);
1713 			if (dst &&
1714 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1715 				skb_dst_set_noref(skb, dst);
1716 		}
1717 	}
1718 }
1719 
1720 /*
1721  *	From tcp_input.c
1722  */
1723 
1724 int tcp_v4_rcv(struct sk_buff *skb)
1725 {
1726 	const struct iphdr *iph;
1727 	const struct tcphdr *th;
1728 	struct sock *sk;
1729 	int ret;
1730 	struct net *net = dev_net(skb->dev);
1731 
1732 	if (skb->pkt_type != PACKET_HOST)
1733 		goto discard_it;
1734 
1735 	/* Count it even if it's bad */
1736 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1737 
1738 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1739 		goto discard_it;
1740 
1741 	th = tcp_hdr(skb);
1742 
1743 	if (th->doff < sizeof(struct tcphdr) / 4)
1744 		goto bad_packet;
1745 	if (!pskb_may_pull(skb, th->doff * 4))
1746 		goto discard_it;
1747 
1748 	/* An explanation is required here, I think.
1749 	 * Packet length and doff are validated by header prediction,
1750 	 * provided case of th->doff==0 is eliminated.
1751 	 * So, we defer the checks. */
1752 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1753 		goto bad_packet;
1754 
1755 	th = tcp_hdr(skb);
1756 	iph = ip_hdr(skb);
1757 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1758 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1759 				    skb->len - th->doff * 4);
1760 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1761 	TCP_SKB_CB(skb)->when	 = 0;
1762 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1763 	TCP_SKB_CB(skb)->sacked	 = 0;
1764 
1765 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1766 	if (!sk)
1767 		goto no_tcp_socket;
1768 
1769 process:
1770 	if (sk->sk_state == TCP_TIME_WAIT)
1771 		goto do_time_wait;
1772 
1773 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1774 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1775 		goto discard_and_relse;
1776 	}
1777 
1778 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1779 		goto discard_and_relse;
1780 	nf_reset(skb);
1781 
1782 	if (sk_filter(sk, skb))
1783 		goto discard_and_relse;
1784 
1785 	skb->dev = NULL;
1786 
1787 	bh_lock_sock_nested(sk);
1788 	ret = 0;
1789 	if (!sock_owned_by_user(sk)) {
1790 #ifdef CONFIG_NET_DMA
1791 		struct tcp_sock *tp = tcp_sk(sk);
1792 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1793 			tp->ucopy.dma_chan = net_dma_find_channel();
1794 		if (tp->ucopy.dma_chan)
1795 			ret = tcp_v4_do_rcv(sk, skb);
1796 		else
1797 #endif
1798 		{
1799 			if (!tcp_prequeue(sk, skb))
1800 				ret = tcp_v4_do_rcv(sk, skb);
1801 		}
1802 	} else if (unlikely(sk_add_backlog(sk, skb,
1803 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
1804 		bh_unlock_sock(sk);
1805 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1806 		goto discard_and_relse;
1807 	}
1808 	bh_unlock_sock(sk);
1809 
1810 	sock_put(sk);
1811 
1812 	return ret;
1813 
1814 no_tcp_socket:
1815 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1816 		goto discard_it;
1817 
1818 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1819 bad_packet:
1820 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1821 	} else {
1822 		tcp_v4_send_reset(NULL, skb);
1823 	}
1824 
1825 discard_it:
1826 	/* Discard frame. */
1827 	kfree_skb(skb);
1828 	return 0;
1829 
1830 discard_and_relse:
1831 	sock_put(sk);
1832 	goto discard_it;
1833 
1834 do_time_wait:
1835 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1836 		inet_twsk_put(inet_twsk(sk));
1837 		goto discard_it;
1838 	}
1839 
1840 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1841 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1842 		inet_twsk_put(inet_twsk(sk));
1843 		goto discard_it;
1844 	}
1845 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1846 	case TCP_TW_SYN: {
1847 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1848 							&tcp_hashinfo,
1849 							iph->daddr, th->dest,
1850 							inet_iif(skb));
1851 		if (sk2) {
1852 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1853 			inet_twsk_put(inet_twsk(sk));
1854 			sk = sk2;
1855 			goto process;
1856 		}
1857 		/* Fall through to ACK */
1858 	}
1859 	case TCP_TW_ACK:
1860 		tcp_v4_timewait_ack(sk, skb);
1861 		break;
1862 	case TCP_TW_RST:
1863 		goto no_tcp_socket;
1864 	case TCP_TW_SUCCESS:;
1865 	}
1866 	goto discard_it;
1867 }
1868 
1869 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1870 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1871 	.twsk_unique	= tcp_twsk_unique,
1872 	.twsk_destructor= tcp_twsk_destructor,
1873 };
1874 
1875 void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1876 {
1877 	struct dst_entry *dst = skb_dst(skb);
1878 
1879 	dst_hold(dst);
1880 	sk->sk_rx_dst = dst;
1881 	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1882 }
1883 EXPORT_SYMBOL(inet_sk_rx_dst_set);
1884 
1885 const struct inet_connection_sock_af_ops ipv4_specific = {
1886 	.queue_xmit	   = ip_queue_xmit,
1887 	.send_check	   = tcp_v4_send_check,
1888 	.rebuild_header	   = inet_sk_rebuild_header,
1889 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1890 	.conn_request	   = tcp_v4_conn_request,
1891 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1892 	.net_header_len	   = sizeof(struct iphdr),
1893 	.setsockopt	   = ip_setsockopt,
1894 	.getsockopt	   = ip_getsockopt,
1895 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1896 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1897 	.bind_conflict	   = inet_csk_bind_conflict,
1898 #ifdef CONFIG_COMPAT
1899 	.compat_setsockopt = compat_ip_setsockopt,
1900 	.compat_getsockopt = compat_ip_getsockopt,
1901 #endif
1902 };
1903 EXPORT_SYMBOL(ipv4_specific);
1904 
1905 #ifdef CONFIG_TCP_MD5SIG
1906 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1907 	.md5_lookup		= tcp_v4_md5_lookup,
1908 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1909 	.md5_parse		= tcp_v4_parse_md5_keys,
1910 };
1911 #endif
1912 
1913 /* NOTE: A lot of things set to zero explicitly by call to
1914  *       sk_alloc() so need not be done here.
1915  */
1916 static int tcp_v4_init_sock(struct sock *sk)
1917 {
1918 	struct inet_connection_sock *icsk = inet_csk(sk);
1919 
1920 	tcp_init_sock(sk);
1921 
1922 	icsk->icsk_af_ops = &ipv4_specific;
1923 
1924 #ifdef CONFIG_TCP_MD5SIG
1925 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1926 #endif
1927 
1928 	return 0;
1929 }
1930 
1931 void tcp_v4_destroy_sock(struct sock *sk)
1932 {
1933 	struct tcp_sock *tp = tcp_sk(sk);
1934 
1935 	tcp_clear_xmit_timers(sk);
1936 
1937 	tcp_cleanup_congestion_control(sk);
1938 
1939 	/* Cleanup up the write buffer. */
1940 	tcp_write_queue_purge(sk);
1941 
1942 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1943 	__skb_queue_purge(&tp->out_of_order_queue);
1944 
1945 #ifdef CONFIG_TCP_MD5SIG
1946 	/* Clean up the MD5 key list, if any */
1947 	if (tp->md5sig_info) {
1948 		tcp_clear_md5_list(sk);
1949 		kfree_rcu(tp->md5sig_info, rcu);
1950 		tp->md5sig_info = NULL;
1951 	}
1952 #endif
1953 
1954 #ifdef CONFIG_NET_DMA
1955 	/* Cleans up our sk_async_wait_queue */
1956 	__skb_queue_purge(&sk->sk_async_wait_queue);
1957 #endif
1958 
1959 	/* Clean prequeue, it must be empty really */
1960 	__skb_queue_purge(&tp->ucopy.prequeue);
1961 
1962 	/* Clean up a referenced TCP bind bucket. */
1963 	if (inet_csk(sk)->icsk_bind_hash)
1964 		inet_put_port(sk);
1965 
1966 	/*
1967 	 * If sendmsg cached page exists, toss it.
1968 	 */
1969 	if (sk->sk_sndmsg_page) {
1970 		__free_page(sk->sk_sndmsg_page);
1971 		sk->sk_sndmsg_page = NULL;
1972 	}
1973 
1974 	/* TCP Cookie Transactions */
1975 	if (tp->cookie_values != NULL) {
1976 		kref_put(&tp->cookie_values->kref,
1977 			 tcp_cookie_values_release);
1978 		tp->cookie_values = NULL;
1979 	}
1980 
1981 	/* If socket is aborted during connect operation */
1982 	tcp_free_fastopen_req(tp);
1983 
1984 	sk_sockets_allocated_dec(sk);
1985 	sock_release_memcg(sk);
1986 }
1987 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1988 
1989 #ifdef CONFIG_PROC_FS
1990 /* Proc filesystem TCP sock list dumping. */
1991 
1992 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1993 {
1994 	return hlist_nulls_empty(head) ? NULL :
1995 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1996 }
1997 
1998 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1999 {
2000 	return !is_a_nulls(tw->tw_node.next) ?
2001 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2002 }
2003 
2004 /*
2005  * Get next listener socket follow cur.  If cur is NULL, get first socket
2006  * starting from bucket given in st->bucket; when st->bucket is zero the
2007  * very first socket in the hash table is returned.
2008  */
2009 static void *listening_get_next(struct seq_file *seq, void *cur)
2010 {
2011 	struct inet_connection_sock *icsk;
2012 	struct hlist_nulls_node *node;
2013 	struct sock *sk = cur;
2014 	struct inet_listen_hashbucket *ilb;
2015 	struct tcp_iter_state *st = seq->private;
2016 	struct net *net = seq_file_net(seq);
2017 
2018 	if (!sk) {
2019 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2020 		spin_lock_bh(&ilb->lock);
2021 		sk = sk_nulls_head(&ilb->head);
2022 		st->offset = 0;
2023 		goto get_sk;
2024 	}
2025 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
2026 	++st->num;
2027 	++st->offset;
2028 
2029 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2030 		struct request_sock *req = cur;
2031 
2032 		icsk = inet_csk(st->syn_wait_sk);
2033 		req = req->dl_next;
2034 		while (1) {
2035 			while (req) {
2036 				if (req->rsk_ops->family == st->family) {
2037 					cur = req;
2038 					goto out;
2039 				}
2040 				req = req->dl_next;
2041 			}
2042 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2043 				break;
2044 get_req:
2045 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2046 		}
2047 		sk	  = sk_nulls_next(st->syn_wait_sk);
2048 		st->state = TCP_SEQ_STATE_LISTENING;
2049 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2050 	} else {
2051 		icsk = inet_csk(sk);
2052 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2053 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2054 			goto start_req;
2055 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2056 		sk = sk_nulls_next(sk);
2057 	}
2058 get_sk:
2059 	sk_nulls_for_each_from(sk, node) {
2060 		if (!net_eq(sock_net(sk), net))
2061 			continue;
2062 		if (sk->sk_family == st->family) {
2063 			cur = sk;
2064 			goto out;
2065 		}
2066 		icsk = inet_csk(sk);
2067 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2068 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2069 start_req:
2070 			st->uid		= sock_i_uid(sk);
2071 			st->syn_wait_sk = sk;
2072 			st->state	= TCP_SEQ_STATE_OPENREQ;
2073 			st->sbucket	= 0;
2074 			goto get_req;
2075 		}
2076 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2077 	}
2078 	spin_unlock_bh(&ilb->lock);
2079 	st->offset = 0;
2080 	if (++st->bucket < INET_LHTABLE_SIZE) {
2081 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2082 		spin_lock_bh(&ilb->lock);
2083 		sk = sk_nulls_head(&ilb->head);
2084 		goto get_sk;
2085 	}
2086 	cur = NULL;
2087 out:
2088 	return cur;
2089 }
2090 
2091 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2092 {
2093 	struct tcp_iter_state *st = seq->private;
2094 	void *rc;
2095 
2096 	st->bucket = 0;
2097 	st->offset = 0;
2098 	rc = listening_get_next(seq, NULL);
2099 
2100 	while (rc && *pos) {
2101 		rc = listening_get_next(seq, rc);
2102 		--*pos;
2103 	}
2104 	return rc;
2105 }
2106 
2107 static inline bool empty_bucket(struct tcp_iter_state *st)
2108 {
2109 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2110 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2111 }
2112 
2113 /*
2114  * Get first established socket starting from bucket given in st->bucket.
2115  * If st->bucket is zero, the very first socket in the hash is returned.
2116  */
2117 static void *established_get_first(struct seq_file *seq)
2118 {
2119 	struct tcp_iter_state *st = seq->private;
2120 	struct net *net = seq_file_net(seq);
2121 	void *rc = NULL;
2122 
2123 	st->offset = 0;
2124 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2125 		struct sock *sk;
2126 		struct hlist_nulls_node *node;
2127 		struct inet_timewait_sock *tw;
2128 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2129 
2130 		/* Lockless fast path for the common case of empty buckets */
2131 		if (empty_bucket(st))
2132 			continue;
2133 
2134 		spin_lock_bh(lock);
2135 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2136 			if (sk->sk_family != st->family ||
2137 			    !net_eq(sock_net(sk), net)) {
2138 				continue;
2139 			}
2140 			rc = sk;
2141 			goto out;
2142 		}
2143 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2144 		inet_twsk_for_each(tw, node,
2145 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2146 			if (tw->tw_family != st->family ||
2147 			    !net_eq(twsk_net(tw), net)) {
2148 				continue;
2149 			}
2150 			rc = tw;
2151 			goto out;
2152 		}
2153 		spin_unlock_bh(lock);
2154 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2155 	}
2156 out:
2157 	return rc;
2158 }
2159 
2160 static void *established_get_next(struct seq_file *seq, void *cur)
2161 {
2162 	struct sock *sk = cur;
2163 	struct inet_timewait_sock *tw;
2164 	struct hlist_nulls_node *node;
2165 	struct tcp_iter_state *st = seq->private;
2166 	struct net *net = seq_file_net(seq);
2167 
2168 	++st->num;
2169 	++st->offset;
2170 
2171 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2172 		tw = cur;
2173 		tw = tw_next(tw);
2174 get_tw:
2175 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2176 			tw = tw_next(tw);
2177 		}
2178 		if (tw) {
2179 			cur = tw;
2180 			goto out;
2181 		}
2182 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2183 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2184 
2185 		/* Look for next non empty bucket */
2186 		st->offset = 0;
2187 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2188 				empty_bucket(st))
2189 			;
2190 		if (st->bucket > tcp_hashinfo.ehash_mask)
2191 			return NULL;
2192 
2193 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2194 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2195 	} else
2196 		sk = sk_nulls_next(sk);
2197 
2198 	sk_nulls_for_each_from(sk, node) {
2199 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2200 			goto found;
2201 	}
2202 
2203 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2204 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2205 	goto get_tw;
2206 found:
2207 	cur = sk;
2208 out:
2209 	return cur;
2210 }
2211 
2212 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2213 {
2214 	struct tcp_iter_state *st = seq->private;
2215 	void *rc;
2216 
2217 	st->bucket = 0;
2218 	rc = established_get_first(seq);
2219 
2220 	while (rc && pos) {
2221 		rc = established_get_next(seq, rc);
2222 		--pos;
2223 	}
2224 	return rc;
2225 }
2226 
2227 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2228 {
2229 	void *rc;
2230 	struct tcp_iter_state *st = seq->private;
2231 
2232 	st->state = TCP_SEQ_STATE_LISTENING;
2233 	rc	  = listening_get_idx(seq, &pos);
2234 
2235 	if (!rc) {
2236 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2237 		rc	  = established_get_idx(seq, pos);
2238 	}
2239 
2240 	return rc;
2241 }
2242 
2243 static void *tcp_seek_last_pos(struct seq_file *seq)
2244 {
2245 	struct tcp_iter_state *st = seq->private;
2246 	int offset = st->offset;
2247 	int orig_num = st->num;
2248 	void *rc = NULL;
2249 
2250 	switch (st->state) {
2251 	case TCP_SEQ_STATE_OPENREQ:
2252 	case TCP_SEQ_STATE_LISTENING:
2253 		if (st->bucket >= INET_LHTABLE_SIZE)
2254 			break;
2255 		st->state = TCP_SEQ_STATE_LISTENING;
2256 		rc = listening_get_next(seq, NULL);
2257 		while (offset-- && rc)
2258 			rc = listening_get_next(seq, rc);
2259 		if (rc)
2260 			break;
2261 		st->bucket = 0;
2262 		/* Fallthrough */
2263 	case TCP_SEQ_STATE_ESTABLISHED:
2264 	case TCP_SEQ_STATE_TIME_WAIT:
2265 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2266 		if (st->bucket > tcp_hashinfo.ehash_mask)
2267 			break;
2268 		rc = established_get_first(seq);
2269 		while (offset-- && rc)
2270 			rc = established_get_next(seq, rc);
2271 	}
2272 
2273 	st->num = orig_num;
2274 
2275 	return rc;
2276 }
2277 
2278 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2279 {
2280 	struct tcp_iter_state *st = seq->private;
2281 	void *rc;
2282 
2283 	if (*pos && *pos == st->last_pos) {
2284 		rc = tcp_seek_last_pos(seq);
2285 		if (rc)
2286 			goto out;
2287 	}
2288 
2289 	st->state = TCP_SEQ_STATE_LISTENING;
2290 	st->num = 0;
2291 	st->bucket = 0;
2292 	st->offset = 0;
2293 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2294 
2295 out:
2296 	st->last_pos = *pos;
2297 	return rc;
2298 }
2299 
2300 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2301 {
2302 	struct tcp_iter_state *st = seq->private;
2303 	void *rc = NULL;
2304 
2305 	if (v == SEQ_START_TOKEN) {
2306 		rc = tcp_get_idx(seq, 0);
2307 		goto out;
2308 	}
2309 
2310 	switch (st->state) {
2311 	case TCP_SEQ_STATE_OPENREQ:
2312 	case TCP_SEQ_STATE_LISTENING:
2313 		rc = listening_get_next(seq, v);
2314 		if (!rc) {
2315 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2316 			st->bucket = 0;
2317 			st->offset = 0;
2318 			rc	  = established_get_first(seq);
2319 		}
2320 		break;
2321 	case TCP_SEQ_STATE_ESTABLISHED:
2322 	case TCP_SEQ_STATE_TIME_WAIT:
2323 		rc = established_get_next(seq, v);
2324 		break;
2325 	}
2326 out:
2327 	++*pos;
2328 	st->last_pos = *pos;
2329 	return rc;
2330 }
2331 
2332 static void tcp_seq_stop(struct seq_file *seq, void *v)
2333 {
2334 	struct tcp_iter_state *st = seq->private;
2335 
2336 	switch (st->state) {
2337 	case TCP_SEQ_STATE_OPENREQ:
2338 		if (v) {
2339 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2340 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2341 		}
2342 	case TCP_SEQ_STATE_LISTENING:
2343 		if (v != SEQ_START_TOKEN)
2344 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2345 		break;
2346 	case TCP_SEQ_STATE_TIME_WAIT:
2347 	case TCP_SEQ_STATE_ESTABLISHED:
2348 		if (v)
2349 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2350 		break;
2351 	}
2352 }
2353 
2354 int tcp_seq_open(struct inode *inode, struct file *file)
2355 {
2356 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2357 	struct tcp_iter_state *s;
2358 	int err;
2359 
2360 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2361 			  sizeof(struct tcp_iter_state));
2362 	if (err < 0)
2363 		return err;
2364 
2365 	s = ((struct seq_file *)file->private_data)->private;
2366 	s->family		= afinfo->family;
2367 	s->last_pos 		= 0;
2368 	return 0;
2369 }
2370 EXPORT_SYMBOL(tcp_seq_open);
2371 
2372 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2373 {
2374 	int rc = 0;
2375 	struct proc_dir_entry *p;
2376 
2377 	afinfo->seq_ops.start		= tcp_seq_start;
2378 	afinfo->seq_ops.next		= tcp_seq_next;
2379 	afinfo->seq_ops.stop		= tcp_seq_stop;
2380 
2381 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2382 			     afinfo->seq_fops, afinfo);
2383 	if (!p)
2384 		rc = -ENOMEM;
2385 	return rc;
2386 }
2387 EXPORT_SYMBOL(tcp_proc_register);
2388 
2389 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2390 {
2391 	proc_net_remove(net, afinfo->name);
2392 }
2393 EXPORT_SYMBOL(tcp_proc_unregister);
2394 
2395 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2396 			 struct seq_file *f, int i, int uid, int *len)
2397 {
2398 	const struct inet_request_sock *ireq = inet_rsk(req);
2399 	int ttd = req->expires - jiffies;
2400 
2401 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2402 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2403 		i,
2404 		ireq->loc_addr,
2405 		ntohs(inet_sk(sk)->inet_sport),
2406 		ireq->rmt_addr,
2407 		ntohs(ireq->rmt_port),
2408 		TCP_SYN_RECV,
2409 		0, 0, /* could print option size, but that is af dependent. */
2410 		1,    /* timers active (only the expire timer) */
2411 		jiffies_to_clock_t(ttd),
2412 		req->retrans,
2413 		uid,
2414 		0,  /* non standard timer */
2415 		0, /* open_requests have no inode */
2416 		atomic_read(&sk->sk_refcnt),
2417 		req,
2418 		len);
2419 }
2420 
2421 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2422 {
2423 	int timer_active;
2424 	unsigned long timer_expires;
2425 	const struct tcp_sock *tp = tcp_sk(sk);
2426 	const struct inet_connection_sock *icsk = inet_csk(sk);
2427 	const struct inet_sock *inet = inet_sk(sk);
2428 	__be32 dest = inet->inet_daddr;
2429 	__be32 src = inet->inet_rcv_saddr;
2430 	__u16 destp = ntohs(inet->inet_dport);
2431 	__u16 srcp = ntohs(inet->inet_sport);
2432 	int rx_queue;
2433 
2434 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2435 		timer_active	= 1;
2436 		timer_expires	= icsk->icsk_timeout;
2437 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2438 		timer_active	= 4;
2439 		timer_expires	= icsk->icsk_timeout;
2440 	} else if (timer_pending(&sk->sk_timer)) {
2441 		timer_active	= 2;
2442 		timer_expires	= sk->sk_timer.expires;
2443 	} else {
2444 		timer_active	= 0;
2445 		timer_expires = jiffies;
2446 	}
2447 
2448 	if (sk->sk_state == TCP_LISTEN)
2449 		rx_queue = sk->sk_ack_backlog;
2450 	else
2451 		/*
2452 		 * because we dont lock socket, we might find a transient negative value
2453 		 */
2454 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2455 
2456 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2457 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2458 		i, src, srcp, dest, destp, sk->sk_state,
2459 		tp->write_seq - tp->snd_una,
2460 		rx_queue,
2461 		timer_active,
2462 		jiffies_to_clock_t(timer_expires - jiffies),
2463 		icsk->icsk_retransmits,
2464 		sock_i_uid(sk),
2465 		icsk->icsk_probes_out,
2466 		sock_i_ino(sk),
2467 		atomic_read(&sk->sk_refcnt), sk,
2468 		jiffies_to_clock_t(icsk->icsk_rto),
2469 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2470 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2471 		tp->snd_cwnd,
2472 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2473 		len);
2474 }
2475 
2476 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2477 			       struct seq_file *f, int i, int *len)
2478 {
2479 	__be32 dest, src;
2480 	__u16 destp, srcp;
2481 	int ttd = tw->tw_ttd - jiffies;
2482 
2483 	if (ttd < 0)
2484 		ttd = 0;
2485 
2486 	dest  = tw->tw_daddr;
2487 	src   = tw->tw_rcv_saddr;
2488 	destp = ntohs(tw->tw_dport);
2489 	srcp  = ntohs(tw->tw_sport);
2490 
2491 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2492 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2493 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2494 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2495 		atomic_read(&tw->tw_refcnt), tw, len);
2496 }
2497 
2498 #define TMPSZ 150
2499 
2500 static int tcp4_seq_show(struct seq_file *seq, void *v)
2501 {
2502 	struct tcp_iter_state *st;
2503 	int len;
2504 
2505 	if (v == SEQ_START_TOKEN) {
2506 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2507 			   "  sl  local_address rem_address   st tx_queue "
2508 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2509 			   "inode");
2510 		goto out;
2511 	}
2512 	st = seq->private;
2513 
2514 	switch (st->state) {
2515 	case TCP_SEQ_STATE_LISTENING:
2516 	case TCP_SEQ_STATE_ESTABLISHED:
2517 		get_tcp4_sock(v, seq, st->num, &len);
2518 		break;
2519 	case TCP_SEQ_STATE_OPENREQ:
2520 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2521 		break;
2522 	case TCP_SEQ_STATE_TIME_WAIT:
2523 		get_timewait4_sock(v, seq, st->num, &len);
2524 		break;
2525 	}
2526 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2527 out:
2528 	return 0;
2529 }
2530 
2531 static const struct file_operations tcp_afinfo_seq_fops = {
2532 	.owner   = THIS_MODULE,
2533 	.open    = tcp_seq_open,
2534 	.read    = seq_read,
2535 	.llseek  = seq_lseek,
2536 	.release = seq_release_net
2537 };
2538 
2539 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2540 	.name		= "tcp",
2541 	.family		= AF_INET,
2542 	.seq_fops	= &tcp_afinfo_seq_fops,
2543 	.seq_ops	= {
2544 		.show		= tcp4_seq_show,
2545 	},
2546 };
2547 
2548 static int __net_init tcp4_proc_init_net(struct net *net)
2549 {
2550 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2551 }
2552 
2553 static void __net_exit tcp4_proc_exit_net(struct net *net)
2554 {
2555 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2556 }
2557 
2558 static struct pernet_operations tcp4_net_ops = {
2559 	.init = tcp4_proc_init_net,
2560 	.exit = tcp4_proc_exit_net,
2561 };
2562 
2563 int __init tcp4_proc_init(void)
2564 {
2565 	return register_pernet_subsys(&tcp4_net_ops);
2566 }
2567 
2568 void tcp4_proc_exit(void)
2569 {
2570 	unregister_pernet_subsys(&tcp4_net_ops);
2571 }
2572 #endif /* CONFIG_PROC_FS */
2573 
2574 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2575 {
2576 	const struct iphdr *iph = skb_gro_network_header(skb);
2577 
2578 	switch (skb->ip_summed) {
2579 	case CHECKSUM_COMPLETE:
2580 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2581 				  skb->csum)) {
2582 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2583 			break;
2584 		}
2585 
2586 		/* fall through */
2587 	case CHECKSUM_NONE:
2588 		NAPI_GRO_CB(skb)->flush = 1;
2589 		return NULL;
2590 	}
2591 
2592 	return tcp_gro_receive(head, skb);
2593 }
2594 
2595 int tcp4_gro_complete(struct sk_buff *skb)
2596 {
2597 	const struct iphdr *iph = ip_hdr(skb);
2598 	struct tcphdr *th = tcp_hdr(skb);
2599 
2600 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2601 				  iph->saddr, iph->daddr, 0);
2602 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2603 
2604 	return tcp_gro_complete(skb);
2605 }
2606 
2607 struct proto tcp_prot = {
2608 	.name			= "TCP",
2609 	.owner			= THIS_MODULE,
2610 	.close			= tcp_close,
2611 	.connect		= tcp_v4_connect,
2612 	.disconnect		= tcp_disconnect,
2613 	.accept			= inet_csk_accept,
2614 	.ioctl			= tcp_ioctl,
2615 	.init			= tcp_v4_init_sock,
2616 	.destroy		= tcp_v4_destroy_sock,
2617 	.shutdown		= tcp_shutdown,
2618 	.setsockopt		= tcp_setsockopt,
2619 	.getsockopt		= tcp_getsockopt,
2620 	.recvmsg		= tcp_recvmsg,
2621 	.sendmsg		= tcp_sendmsg,
2622 	.sendpage		= tcp_sendpage,
2623 	.backlog_rcv		= tcp_v4_do_rcv,
2624 	.release_cb		= tcp_release_cb,
2625 	.mtu_reduced		= tcp_v4_mtu_reduced,
2626 	.hash			= inet_hash,
2627 	.unhash			= inet_unhash,
2628 	.get_port		= inet_csk_get_port,
2629 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2630 	.sockets_allocated	= &tcp_sockets_allocated,
2631 	.orphan_count		= &tcp_orphan_count,
2632 	.memory_allocated	= &tcp_memory_allocated,
2633 	.memory_pressure	= &tcp_memory_pressure,
2634 	.sysctl_wmem		= sysctl_tcp_wmem,
2635 	.sysctl_rmem		= sysctl_tcp_rmem,
2636 	.max_header		= MAX_TCP_HEADER,
2637 	.obj_size		= sizeof(struct tcp_sock),
2638 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2639 	.twsk_prot		= &tcp_timewait_sock_ops,
2640 	.rsk_prot		= &tcp_request_sock_ops,
2641 	.h.hashinfo		= &tcp_hashinfo,
2642 	.no_autobind		= true,
2643 #ifdef CONFIG_COMPAT
2644 	.compat_setsockopt	= compat_tcp_setsockopt,
2645 	.compat_getsockopt	= compat_tcp_getsockopt,
2646 #endif
2647 #ifdef CONFIG_MEMCG_KMEM
2648 	.init_cgroup		= tcp_init_cgroup,
2649 	.destroy_cgroup		= tcp_destroy_cgroup,
2650 	.proto_cgroup		= tcp_proto_cgroup,
2651 #endif
2652 };
2653 EXPORT_SYMBOL(tcp_prot);
2654 
2655 static int __net_init tcp_sk_init(struct net *net)
2656 {
2657 	return 0;
2658 }
2659 
2660 static void __net_exit tcp_sk_exit(struct net *net)
2661 {
2662 }
2663 
2664 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2665 {
2666 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2667 }
2668 
2669 static struct pernet_operations __net_initdata tcp_sk_ops = {
2670        .init	   = tcp_sk_init,
2671        .exit	   = tcp_sk_exit,
2672        .exit_batch = tcp_sk_exit_batch,
2673 };
2674 
2675 void __init tcp_v4_init(void)
2676 {
2677 	inet_hashinfo_init(&tcp_hashinfo);
2678 	if (register_pernet_subsys(&tcp_sk_ops))
2679 		panic("Failed to create the TCP control socket.\n");
2680 }
2681