xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision 63dc02bd)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 #define pr_fmt(fmt) "TCP: " fmt
54 
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65 
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/netdma.h>
76 #include <net/secure_seq.h>
77 #include <net/tcp_memcontrol.h>
78 
79 #include <linux/inet.h>
80 #include <linux/ipv6.h>
81 #include <linux/stddef.h>
82 #include <linux/proc_fs.h>
83 #include <linux/seq_file.h>
84 
85 #include <linux/crypto.h>
86 #include <linux/scatterlist.h>
87 
88 int sysctl_tcp_tw_reuse __read_mostly;
89 int sysctl_tcp_low_latency __read_mostly;
90 EXPORT_SYMBOL(sysctl_tcp_low_latency);
91 
92 
93 #ifdef CONFIG_TCP_MD5SIG
94 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
96 #endif
97 
98 struct inet_hashinfo tcp_hashinfo;
99 EXPORT_SYMBOL(tcp_hashinfo);
100 
101 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102 {
103 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 					  ip_hdr(skb)->saddr,
105 					  tcp_hdr(skb)->dest,
106 					  tcp_hdr(skb)->source);
107 }
108 
109 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110 {
111 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 	struct tcp_sock *tp = tcp_sk(sk);
113 
114 	/* With PAWS, it is safe from the viewpoint
115 	   of data integrity. Even without PAWS it is safe provided sequence
116 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117 
118 	   Actually, the idea is close to VJ's one, only timestamp cache is
119 	   held not per host, but per port pair and TW bucket is used as state
120 	   holder.
121 
122 	   If TW bucket has been already destroyed we fall back to VJ's scheme
123 	   and use initial timestamp retrieved from peer table.
124 	 */
125 	if (tcptw->tw_ts_recent_stamp &&
126 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
127 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
128 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 		if (tp->write_seq == 0)
130 			tp->write_seq = 1;
131 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
132 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 		sock_hold(sktw);
134 		return 1;
135 	}
136 
137 	return 0;
138 }
139 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 
141 /* This will initiate an outgoing connection. */
142 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143 {
144 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
145 	struct inet_sock *inet = inet_sk(sk);
146 	struct tcp_sock *tp = tcp_sk(sk);
147 	__be16 orig_sport, orig_dport;
148 	__be32 daddr, nexthop;
149 	struct flowi4 *fl4;
150 	struct rtable *rt;
151 	int err;
152 	struct ip_options_rcu *inet_opt;
153 
154 	if (addr_len < sizeof(struct sockaddr_in))
155 		return -EINVAL;
156 
157 	if (usin->sin_family != AF_INET)
158 		return -EAFNOSUPPORT;
159 
160 	nexthop = daddr = usin->sin_addr.s_addr;
161 	inet_opt = rcu_dereference_protected(inet->inet_opt,
162 					     sock_owned_by_user(sk));
163 	if (inet_opt && inet_opt->opt.srr) {
164 		if (!daddr)
165 			return -EINVAL;
166 		nexthop = inet_opt->opt.faddr;
167 	}
168 
169 	orig_sport = inet->inet_sport;
170 	orig_dport = usin->sin_port;
171 	fl4 = &inet->cork.fl.u.ip4;
172 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
173 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 			      IPPROTO_TCP,
175 			      orig_sport, orig_dport, sk, true);
176 	if (IS_ERR(rt)) {
177 		err = PTR_ERR(rt);
178 		if (err == -ENETUNREACH)
179 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
180 		return err;
181 	}
182 
183 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 		ip_rt_put(rt);
185 		return -ENETUNREACH;
186 	}
187 
188 	if (!inet_opt || !inet_opt->opt.srr)
189 		daddr = fl4->daddr;
190 
191 	if (!inet->inet_saddr)
192 		inet->inet_saddr = fl4->saddr;
193 	inet->inet_rcv_saddr = inet->inet_saddr;
194 
195 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
196 		/* Reset inherited state */
197 		tp->rx_opt.ts_recent	   = 0;
198 		tp->rx_opt.ts_recent_stamp = 0;
199 		tp->write_seq		   = 0;
200 	}
201 
202 	if (tcp_death_row.sysctl_tw_recycle &&
203 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
204 		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
205 		/*
206 		 * VJ's idea. We save last timestamp seen from
207 		 * the destination in peer table, when entering state
208 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
209 		 * when trying new connection.
210 		 */
211 		if (peer) {
212 			inet_peer_refcheck(peer);
213 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
214 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
215 				tp->rx_opt.ts_recent = peer->tcp_ts;
216 			}
217 		}
218 	}
219 
220 	inet->inet_dport = usin->sin_port;
221 	inet->inet_daddr = daddr;
222 
223 	inet_csk(sk)->icsk_ext_hdr_len = 0;
224 	if (inet_opt)
225 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
226 
227 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
228 
229 	/* Socket identity is still unknown (sport may be zero).
230 	 * However we set state to SYN-SENT and not releasing socket
231 	 * lock select source port, enter ourselves into the hash tables and
232 	 * complete initialization after this.
233 	 */
234 	tcp_set_state(sk, TCP_SYN_SENT);
235 	err = inet_hash_connect(&tcp_death_row, sk);
236 	if (err)
237 		goto failure;
238 
239 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
240 			       inet->inet_sport, inet->inet_dport, sk);
241 	if (IS_ERR(rt)) {
242 		err = PTR_ERR(rt);
243 		rt = NULL;
244 		goto failure;
245 	}
246 	/* OK, now commit destination to socket.  */
247 	sk->sk_gso_type = SKB_GSO_TCPV4;
248 	sk_setup_caps(sk, &rt->dst);
249 
250 	if (!tp->write_seq)
251 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
252 							   inet->inet_daddr,
253 							   inet->inet_sport,
254 							   usin->sin_port);
255 
256 	inet->inet_id = tp->write_seq ^ jiffies;
257 
258 	err = tcp_connect(sk);
259 	rt = NULL;
260 	if (err)
261 		goto failure;
262 
263 	return 0;
264 
265 failure:
266 	/*
267 	 * This unhashes the socket and releases the local port,
268 	 * if necessary.
269 	 */
270 	tcp_set_state(sk, TCP_CLOSE);
271 	ip_rt_put(rt);
272 	sk->sk_route_caps = 0;
273 	inet->inet_dport = 0;
274 	return err;
275 }
276 EXPORT_SYMBOL(tcp_v4_connect);
277 
278 /*
279  * This routine does path mtu discovery as defined in RFC1191.
280  */
281 static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
282 {
283 	struct dst_entry *dst;
284 	struct inet_sock *inet = inet_sk(sk);
285 
286 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
287 	 * send out by Linux are always <576bytes so they should go through
288 	 * unfragmented).
289 	 */
290 	if (sk->sk_state == TCP_LISTEN)
291 		return;
292 
293 	/* We don't check in the destentry if pmtu discovery is forbidden
294 	 * on this route. We just assume that no packet_to_big packets
295 	 * are send back when pmtu discovery is not active.
296 	 * There is a small race when the user changes this flag in the
297 	 * route, but I think that's acceptable.
298 	 */
299 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
300 		return;
301 
302 	dst->ops->update_pmtu(dst, mtu);
303 
304 	/* Something is about to be wrong... Remember soft error
305 	 * for the case, if this connection will not able to recover.
306 	 */
307 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
308 		sk->sk_err_soft = EMSGSIZE;
309 
310 	mtu = dst_mtu(dst);
311 
312 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
313 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
314 		tcp_sync_mss(sk, mtu);
315 
316 		/* Resend the TCP packet because it's
317 		 * clear that the old packet has been
318 		 * dropped. This is the new "fast" path mtu
319 		 * discovery.
320 		 */
321 		tcp_simple_retransmit(sk);
322 	} /* else let the usual retransmit timer handle it */
323 }
324 
325 /*
326  * This routine is called by the ICMP module when it gets some
327  * sort of error condition.  If err < 0 then the socket should
328  * be closed and the error returned to the user.  If err > 0
329  * it's just the icmp type << 8 | icmp code.  After adjustment
330  * header points to the first 8 bytes of the tcp header.  We need
331  * to find the appropriate port.
332  *
333  * The locking strategy used here is very "optimistic". When
334  * someone else accesses the socket the ICMP is just dropped
335  * and for some paths there is no check at all.
336  * A more general error queue to queue errors for later handling
337  * is probably better.
338  *
339  */
340 
341 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
342 {
343 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
344 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
345 	struct inet_connection_sock *icsk;
346 	struct tcp_sock *tp;
347 	struct inet_sock *inet;
348 	const int type = icmp_hdr(icmp_skb)->type;
349 	const int code = icmp_hdr(icmp_skb)->code;
350 	struct sock *sk;
351 	struct sk_buff *skb;
352 	__u32 seq;
353 	__u32 remaining;
354 	int err;
355 	struct net *net = dev_net(icmp_skb->dev);
356 
357 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
358 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
359 		return;
360 	}
361 
362 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
363 			iph->saddr, th->source, inet_iif(icmp_skb));
364 	if (!sk) {
365 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
366 		return;
367 	}
368 	if (sk->sk_state == TCP_TIME_WAIT) {
369 		inet_twsk_put(inet_twsk(sk));
370 		return;
371 	}
372 
373 	bh_lock_sock(sk);
374 	/* If too many ICMPs get dropped on busy
375 	 * servers this needs to be solved differently.
376 	 */
377 	if (sock_owned_by_user(sk))
378 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
379 
380 	if (sk->sk_state == TCP_CLOSE)
381 		goto out;
382 
383 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
384 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
385 		goto out;
386 	}
387 
388 	icsk = inet_csk(sk);
389 	tp = tcp_sk(sk);
390 	seq = ntohl(th->seq);
391 	if (sk->sk_state != TCP_LISTEN &&
392 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
393 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
394 		goto out;
395 	}
396 
397 	switch (type) {
398 	case ICMP_SOURCE_QUENCH:
399 		/* Just silently ignore these. */
400 		goto out;
401 	case ICMP_PARAMETERPROB:
402 		err = EPROTO;
403 		break;
404 	case ICMP_DEST_UNREACH:
405 		if (code > NR_ICMP_UNREACH)
406 			goto out;
407 
408 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
409 			if (!sock_owned_by_user(sk))
410 				do_pmtu_discovery(sk, iph, info);
411 			goto out;
412 		}
413 
414 		err = icmp_err_convert[code].errno;
415 		/* check if icmp_skb allows revert of backoff
416 		 * (see draft-zimmermann-tcp-lcd) */
417 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
418 			break;
419 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
420 		    !icsk->icsk_backoff)
421 			break;
422 
423 		if (sock_owned_by_user(sk))
424 			break;
425 
426 		icsk->icsk_backoff--;
427 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
428 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
429 		tcp_bound_rto(sk);
430 
431 		skb = tcp_write_queue_head(sk);
432 		BUG_ON(!skb);
433 
434 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
435 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
436 
437 		if (remaining) {
438 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
439 						  remaining, TCP_RTO_MAX);
440 		} else {
441 			/* RTO revert clocked out retransmission.
442 			 * Will retransmit now */
443 			tcp_retransmit_timer(sk);
444 		}
445 
446 		break;
447 	case ICMP_TIME_EXCEEDED:
448 		err = EHOSTUNREACH;
449 		break;
450 	default:
451 		goto out;
452 	}
453 
454 	switch (sk->sk_state) {
455 		struct request_sock *req, **prev;
456 	case TCP_LISTEN:
457 		if (sock_owned_by_user(sk))
458 			goto out;
459 
460 		req = inet_csk_search_req(sk, &prev, th->dest,
461 					  iph->daddr, iph->saddr);
462 		if (!req)
463 			goto out;
464 
465 		/* ICMPs are not backlogged, hence we cannot get
466 		   an established socket here.
467 		 */
468 		WARN_ON(req->sk);
469 
470 		if (seq != tcp_rsk(req)->snt_isn) {
471 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
472 			goto out;
473 		}
474 
475 		/*
476 		 * Still in SYN_RECV, just remove it silently.
477 		 * There is no good way to pass the error to the newly
478 		 * created socket, and POSIX does not want network
479 		 * errors returned from accept().
480 		 */
481 		inet_csk_reqsk_queue_drop(sk, req, prev);
482 		goto out;
483 
484 	case TCP_SYN_SENT:
485 	case TCP_SYN_RECV:  /* Cannot happen.
486 			       It can f.e. if SYNs crossed.
487 			     */
488 		if (!sock_owned_by_user(sk)) {
489 			sk->sk_err = err;
490 
491 			sk->sk_error_report(sk);
492 
493 			tcp_done(sk);
494 		} else {
495 			sk->sk_err_soft = err;
496 		}
497 		goto out;
498 	}
499 
500 	/* If we've already connected we will keep trying
501 	 * until we time out, or the user gives up.
502 	 *
503 	 * rfc1122 4.2.3.9 allows to consider as hard errors
504 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
505 	 * but it is obsoleted by pmtu discovery).
506 	 *
507 	 * Note, that in modern internet, where routing is unreliable
508 	 * and in each dark corner broken firewalls sit, sending random
509 	 * errors ordered by their masters even this two messages finally lose
510 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
511 	 *
512 	 * Now we are in compliance with RFCs.
513 	 *							--ANK (980905)
514 	 */
515 
516 	inet = inet_sk(sk);
517 	if (!sock_owned_by_user(sk) && inet->recverr) {
518 		sk->sk_err = err;
519 		sk->sk_error_report(sk);
520 	} else	{ /* Only an error on timeout */
521 		sk->sk_err_soft = err;
522 	}
523 
524 out:
525 	bh_unlock_sock(sk);
526 	sock_put(sk);
527 }
528 
529 static void __tcp_v4_send_check(struct sk_buff *skb,
530 				__be32 saddr, __be32 daddr)
531 {
532 	struct tcphdr *th = tcp_hdr(skb);
533 
534 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
535 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
536 		skb->csum_start = skb_transport_header(skb) - skb->head;
537 		skb->csum_offset = offsetof(struct tcphdr, check);
538 	} else {
539 		th->check = tcp_v4_check(skb->len, saddr, daddr,
540 					 csum_partial(th,
541 						      th->doff << 2,
542 						      skb->csum));
543 	}
544 }
545 
546 /* This routine computes an IPv4 TCP checksum. */
547 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
548 {
549 	const struct inet_sock *inet = inet_sk(sk);
550 
551 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
552 }
553 EXPORT_SYMBOL(tcp_v4_send_check);
554 
555 int tcp_v4_gso_send_check(struct sk_buff *skb)
556 {
557 	const struct iphdr *iph;
558 	struct tcphdr *th;
559 
560 	if (!pskb_may_pull(skb, sizeof(*th)))
561 		return -EINVAL;
562 
563 	iph = ip_hdr(skb);
564 	th = tcp_hdr(skb);
565 
566 	th->check = 0;
567 	skb->ip_summed = CHECKSUM_PARTIAL;
568 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
569 	return 0;
570 }
571 
572 /*
573  *	This routine will send an RST to the other tcp.
574  *
575  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
576  *		      for reset.
577  *	Answer: if a packet caused RST, it is not for a socket
578  *		existing in our system, if it is matched to a socket,
579  *		it is just duplicate segment or bug in other side's TCP.
580  *		So that we build reply only basing on parameters
581  *		arrived with segment.
582  *	Exception: precedence violation. We do not implement it in any case.
583  */
584 
585 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
586 {
587 	const struct tcphdr *th = tcp_hdr(skb);
588 	struct {
589 		struct tcphdr th;
590 #ifdef CONFIG_TCP_MD5SIG
591 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
592 #endif
593 	} rep;
594 	struct ip_reply_arg arg;
595 #ifdef CONFIG_TCP_MD5SIG
596 	struct tcp_md5sig_key *key;
597 	const __u8 *hash_location = NULL;
598 	unsigned char newhash[16];
599 	int genhash;
600 	struct sock *sk1 = NULL;
601 #endif
602 	struct net *net;
603 
604 	/* Never send a reset in response to a reset. */
605 	if (th->rst)
606 		return;
607 
608 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
609 		return;
610 
611 	/* Swap the send and the receive. */
612 	memset(&rep, 0, sizeof(rep));
613 	rep.th.dest   = th->source;
614 	rep.th.source = th->dest;
615 	rep.th.doff   = sizeof(struct tcphdr) / 4;
616 	rep.th.rst    = 1;
617 
618 	if (th->ack) {
619 		rep.th.seq = th->ack_seq;
620 	} else {
621 		rep.th.ack = 1;
622 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
623 				       skb->len - (th->doff << 2));
624 	}
625 
626 	memset(&arg, 0, sizeof(arg));
627 	arg.iov[0].iov_base = (unsigned char *)&rep;
628 	arg.iov[0].iov_len  = sizeof(rep.th);
629 
630 #ifdef CONFIG_TCP_MD5SIG
631 	hash_location = tcp_parse_md5sig_option(th);
632 	if (!sk && hash_location) {
633 		/*
634 		 * active side is lost. Try to find listening socket through
635 		 * source port, and then find md5 key through listening socket.
636 		 * we are not loose security here:
637 		 * Incoming packet is checked with md5 hash with finding key,
638 		 * no RST generated if md5 hash doesn't match.
639 		 */
640 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
641 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
642 					     ntohs(th->source), inet_iif(skb));
643 		/* don't send rst if it can't find key */
644 		if (!sk1)
645 			return;
646 		rcu_read_lock();
647 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
648 					&ip_hdr(skb)->saddr, AF_INET);
649 		if (!key)
650 			goto release_sk1;
651 
652 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
653 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
654 			goto release_sk1;
655 	} else {
656 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
657 					     &ip_hdr(skb)->saddr,
658 					     AF_INET) : NULL;
659 	}
660 
661 	if (key) {
662 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
663 				   (TCPOPT_NOP << 16) |
664 				   (TCPOPT_MD5SIG << 8) |
665 				   TCPOLEN_MD5SIG);
666 		/* Update length and the length the header thinks exists */
667 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
668 		rep.th.doff = arg.iov[0].iov_len / 4;
669 
670 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
671 				     key, ip_hdr(skb)->saddr,
672 				     ip_hdr(skb)->daddr, &rep.th);
673 	}
674 #endif
675 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
676 				      ip_hdr(skb)->saddr, /* XXX */
677 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
678 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
679 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
680 	/* When socket is gone, all binding information is lost.
681 	 * routing might fail in this case. using iif for oif to
682 	 * make sure we can deliver it
683 	 */
684 	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
685 
686 	net = dev_net(skb_dst(skb)->dev);
687 	arg.tos = ip_hdr(skb)->tos;
688 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
689 		      &arg, arg.iov[0].iov_len);
690 
691 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
692 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
693 
694 #ifdef CONFIG_TCP_MD5SIG
695 release_sk1:
696 	if (sk1) {
697 		rcu_read_unlock();
698 		sock_put(sk1);
699 	}
700 #endif
701 }
702 
703 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
704    outside socket context is ugly, certainly. What can I do?
705  */
706 
707 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
708 			    u32 win, u32 ts, int oif,
709 			    struct tcp_md5sig_key *key,
710 			    int reply_flags, u8 tos)
711 {
712 	const struct tcphdr *th = tcp_hdr(skb);
713 	struct {
714 		struct tcphdr th;
715 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
716 #ifdef CONFIG_TCP_MD5SIG
717 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
718 #endif
719 			];
720 	} rep;
721 	struct ip_reply_arg arg;
722 	struct net *net = dev_net(skb_dst(skb)->dev);
723 
724 	memset(&rep.th, 0, sizeof(struct tcphdr));
725 	memset(&arg, 0, sizeof(arg));
726 
727 	arg.iov[0].iov_base = (unsigned char *)&rep;
728 	arg.iov[0].iov_len  = sizeof(rep.th);
729 	if (ts) {
730 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
731 				   (TCPOPT_TIMESTAMP << 8) |
732 				   TCPOLEN_TIMESTAMP);
733 		rep.opt[1] = htonl(tcp_time_stamp);
734 		rep.opt[2] = htonl(ts);
735 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
736 	}
737 
738 	/* Swap the send and the receive. */
739 	rep.th.dest    = th->source;
740 	rep.th.source  = th->dest;
741 	rep.th.doff    = arg.iov[0].iov_len / 4;
742 	rep.th.seq     = htonl(seq);
743 	rep.th.ack_seq = htonl(ack);
744 	rep.th.ack     = 1;
745 	rep.th.window  = htons(win);
746 
747 #ifdef CONFIG_TCP_MD5SIG
748 	if (key) {
749 		int offset = (ts) ? 3 : 0;
750 
751 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
752 					  (TCPOPT_NOP << 16) |
753 					  (TCPOPT_MD5SIG << 8) |
754 					  TCPOLEN_MD5SIG);
755 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
756 		rep.th.doff = arg.iov[0].iov_len/4;
757 
758 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
759 				    key, ip_hdr(skb)->saddr,
760 				    ip_hdr(skb)->daddr, &rep.th);
761 	}
762 #endif
763 	arg.flags = reply_flags;
764 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
765 				      ip_hdr(skb)->saddr, /* XXX */
766 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
767 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
768 	if (oif)
769 		arg.bound_dev_if = oif;
770 	arg.tos = tos;
771 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
772 		      &arg, arg.iov[0].iov_len);
773 
774 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
775 }
776 
777 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
778 {
779 	struct inet_timewait_sock *tw = inet_twsk(sk);
780 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
781 
782 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
783 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
784 			tcptw->tw_ts_recent,
785 			tw->tw_bound_dev_if,
786 			tcp_twsk_md5_key(tcptw),
787 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
788 			tw->tw_tos
789 			);
790 
791 	inet_twsk_put(tw);
792 }
793 
794 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
795 				  struct request_sock *req)
796 {
797 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
798 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
799 			req->ts_recent,
800 			0,
801 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
802 					  AF_INET),
803 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
804 			ip_hdr(skb)->tos);
805 }
806 
807 /*
808  *	Send a SYN-ACK after having received a SYN.
809  *	This still operates on a request_sock only, not on a big
810  *	socket.
811  */
812 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
813 			      struct request_sock *req,
814 			      struct request_values *rvp)
815 {
816 	const struct inet_request_sock *ireq = inet_rsk(req);
817 	struct flowi4 fl4;
818 	int err = -1;
819 	struct sk_buff * skb;
820 
821 	/* First, grab a route. */
822 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
823 		return -1;
824 
825 	skb = tcp_make_synack(sk, dst, req, rvp);
826 
827 	if (skb) {
828 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
829 
830 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
831 					    ireq->rmt_addr,
832 					    ireq->opt);
833 		err = net_xmit_eval(err);
834 	}
835 
836 	dst_release(dst);
837 	return err;
838 }
839 
840 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
841 			      struct request_values *rvp)
842 {
843 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
844 	return tcp_v4_send_synack(sk, NULL, req, rvp);
845 }
846 
847 /*
848  *	IPv4 request_sock destructor.
849  */
850 static void tcp_v4_reqsk_destructor(struct request_sock *req)
851 {
852 	kfree(inet_rsk(req)->opt);
853 }
854 
855 /*
856  * Return 1 if a syncookie should be sent
857  */
858 int tcp_syn_flood_action(struct sock *sk,
859 			 const struct sk_buff *skb,
860 			 const char *proto)
861 {
862 	const char *msg = "Dropping request";
863 	int want_cookie = 0;
864 	struct listen_sock *lopt;
865 
866 
867 
868 #ifdef CONFIG_SYN_COOKIES
869 	if (sysctl_tcp_syncookies) {
870 		msg = "Sending cookies";
871 		want_cookie = 1;
872 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
873 	} else
874 #endif
875 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
876 
877 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
878 	if (!lopt->synflood_warned) {
879 		lopt->synflood_warned = 1;
880 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
881 			proto, ntohs(tcp_hdr(skb)->dest), msg);
882 	}
883 	return want_cookie;
884 }
885 EXPORT_SYMBOL(tcp_syn_flood_action);
886 
887 /*
888  * Save and compile IPv4 options into the request_sock if needed.
889  */
890 static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
891 						  struct sk_buff *skb)
892 {
893 	const struct ip_options *opt = &(IPCB(skb)->opt);
894 	struct ip_options_rcu *dopt = NULL;
895 
896 	if (opt && opt->optlen) {
897 		int opt_size = sizeof(*dopt) + opt->optlen;
898 
899 		dopt = kmalloc(opt_size, GFP_ATOMIC);
900 		if (dopt) {
901 			if (ip_options_echo(&dopt->opt, skb)) {
902 				kfree(dopt);
903 				dopt = NULL;
904 			}
905 		}
906 	}
907 	return dopt;
908 }
909 
910 #ifdef CONFIG_TCP_MD5SIG
911 /*
912  * RFC2385 MD5 checksumming requires a mapping of
913  * IP address->MD5 Key.
914  * We need to maintain these in the sk structure.
915  */
916 
917 /* Find the Key structure for an address.  */
918 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
919 					 const union tcp_md5_addr *addr,
920 					 int family)
921 {
922 	struct tcp_sock *tp = tcp_sk(sk);
923 	struct tcp_md5sig_key *key;
924 	struct hlist_node *pos;
925 	unsigned int size = sizeof(struct in_addr);
926 	struct tcp_md5sig_info *md5sig;
927 
928 	/* caller either holds rcu_read_lock() or socket lock */
929 	md5sig = rcu_dereference_check(tp->md5sig_info,
930 				       sock_owned_by_user(sk) ||
931 				       lockdep_is_held(&sk->sk_lock.slock));
932 	if (!md5sig)
933 		return NULL;
934 #if IS_ENABLED(CONFIG_IPV6)
935 	if (family == AF_INET6)
936 		size = sizeof(struct in6_addr);
937 #endif
938 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
939 		if (key->family != family)
940 			continue;
941 		if (!memcmp(&key->addr, addr, size))
942 			return key;
943 	}
944 	return NULL;
945 }
946 EXPORT_SYMBOL(tcp_md5_do_lookup);
947 
948 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
949 					 struct sock *addr_sk)
950 {
951 	union tcp_md5_addr *addr;
952 
953 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
954 	return tcp_md5_do_lookup(sk, addr, AF_INET);
955 }
956 EXPORT_SYMBOL(tcp_v4_md5_lookup);
957 
958 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
959 						      struct request_sock *req)
960 {
961 	union tcp_md5_addr *addr;
962 
963 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
964 	return tcp_md5_do_lookup(sk, addr, AF_INET);
965 }
966 
967 /* This can be called on a newly created socket, from other files */
968 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
969 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
970 {
971 	/* Add Key to the list */
972 	struct tcp_md5sig_key *key;
973 	struct tcp_sock *tp = tcp_sk(sk);
974 	struct tcp_md5sig_info *md5sig;
975 
976 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
977 	if (key) {
978 		/* Pre-existing entry - just update that one. */
979 		memcpy(key->key, newkey, newkeylen);
980 		key->keylen = newkeylen;
981 		return 0;
982 	}
983 
984 	md5sig = rcu_dereference_protected(tp->md5sig_info,
985 					   sock_owned_by_user(sk));
986 	if (!md5sig) {
987 		md5sig = kmalloc(sizeof(*md5sig), gfp);
988 		if (!md5sig)
989 			return -ENOMEM;
990 
991 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
992 		INIT_HLIST_HEAD(&md5sig->head);
993 		rcu_assign_pointer(tp->md5sig_info, md5sig);
994 	}
995 
996 	key = sock_kmalloc(sk, sizeof(*key), gfp);
997 	if (!key)
998 		return -ENOMEM;
999 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1000 		sock_kfree_s(sk, key, sizeof(*key));
1001 		return -ENOMEM;
1002 	}
1003 
1004 	memcpy(key->key, newkey, newkeylen);
1005 	key->keylen = newkeylen;
1006 	key->family = family;
1007 	memcpy(&key->addr, addr,
1008 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1009 				      sizeof(struct in_addr));
1010 	hlist_add_head_rcu(&key->node, &md5sig->head);
1011 	return 0;
1012 }
1013 EXPORT_SYMBOL(tcp_md5_do_add);
1014 
1015 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1016 {
1017 	struct tcp_sock *tp = tcp_sk(sk);
1018 	struct tcp_md5sig_key *key;
1019 	struct tcp_md5sig_info *md5sig;
1020 
1021 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1022 	if (!key)
1023 		return -ENOENT;
1024 	hlist_del_rcu(&key->node);
1025 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1026 	kfree_rcu(key, rcu);
1027 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1028 					   sock_owned_by_user(sk));
1029 	if (hlist_empty(&md5sig->head))
1030 		tcp_free_md5sig_pool();
1031 	return 0;
1032 }
1033 EXPORT_SYMBOL(tcp_md5_do_del);
1034 
1035 void tcp_clear_md5_list(struct sock *sk)
1036 {
1037 	struct tcp_sock *tp = tcp_sk(sk);
1038 	struct tcp_md5sig_key *key;
1039 	struct hlist_node *pos, *n;
1040 	struct tcp_md5sig_info *md5sig;
1041 
1042 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1043 
1044 	if (!hlist_empty(&md5sig->head))
1045 		tcp_free_md5sig_pool();
1046 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1047 		hlist_del_rcu(&key->node);
1048 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1049 		kfree_rcu(key, rcu);
1050 	}
1051 }
1052 
1053 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1054 				 int optlen)
1055 {
1056 	struct tcp_md5sig cmd;
1057 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1058 
1059 	if (optlen < sizeof(cmd))
1060 		return -EINVAL;
1061 
1062 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1063 		return -EFAULT;
1064 
1065 	if (sin->sin_family != AF_INET)
1066 		return -EINVAL;
1067 
1068 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1069 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1070 				      AF_INET);
1071 
1072 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1073 		return -EINVAL;
1074 
1075 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1076 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1077 			      GFP_KERNEL);
1078 }
1079 
1080 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1081 					__be32 daddr, __be32 saddr, int nbytes)
1082 {
1083 	struct tcp4_pseudohdr *bp;
1084 	struct scatterlist sg;
1085 
1086 	bp = &hp->md5_blk.ip4;
1087 
1088 	/*
1089 	 * 1. the TCP pseudo-header (in the order: source IP address,
1090 	 * destination IP address, zero-padded protocol number, and
1091 	 * segment length)
1092 	 */
1093 	bp->saddr = saddr;
1094 	bp->daddr = daddr;
1095 	bp->pad = 0;
1096 	bp->protocol = IPPROTO_TCP;
1097 	bp->len = cpu_to_be16(nbytes);
1098 
1099 	sg_init_one(&sg, bp, sizeof(*bp));
1100 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1101 }
1102 
1103 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1104 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1105 {
1106 	struct tcp_md5sig_pool *hp;
1107 	struct hash_desc *desc;
1108 
1109 	hp = tcp_get_md5sig_pool();
1110 	if (!hp)
1111 		goto clear_hash_noput;
1112 	desc = &hp->md5_desc;
1113 
1114 	if (crypto_hash_init(desc))
1115 		goto clear_hash;
1116 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1117 		goto clear_hash;
1118 	if (tcp_md5_hash_header(hp, th))
1119 		goto clear_hash;
1120 	if (tcp_md5_hash_key(hp, key))
1121 		goto clear_hash;
1122 	if (crypto_hash_final(desc, md5_hash))
1123 		goto clear_hash;
1124 
1125 	tcp_put_md5sig_pool();
1126 	return 0;
1127 
1128 clear_hash:
1129 	tcp_put_md5sig_pool();
1130 clear_hash_noput:
1131 	memset(md5_hash, 0, 16);
1132 	return 1;
1133 }
1134 
1135 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1136 			const struct sock *sk, const struct request_sock *req,
1137 			const struct sk_buff *skb)
1138 {
1139 	struct tcp_md5sig_pool *hp;
1140 	struct hash_desc *desc;
1141 	const struct tcphdr *th = tcp_hdr(skb);
1142 	__be32 saddr, daddr;
1143 
1144 	if (sk) {
1145 		saddr = inet_sk(sk)->inet_saddr;
1146 		daddr = inet_sk(sk)->inet_daddr;
1147 	} else if (req) {
1148 		saddr = inet_rsk(req)->loc_addr;
1149 		daddr = inet_rsk(req)->rmt_addr;
1150 	} else {
1151 		const struct iphdr *iph = ip_hdr(skb);
1152 		saddr = iph->saddr;
1153 		daddr = iph->daddr;
1154 	}
1155 
1156 	hp = tcp_get_md5sig_pool();
1157 	if (!hp)
1158 		goto clear_hash_noput;
1159 	desc = &hp->md5_desc;
1160 
1161 	if (crypto_hash_init(desc))
1162 		goto clear_hash;
1163 
1164 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1165 		goto clear_hash;
1166 	if (tcp_md5_hash_header(hp, th))
1167 		goto clear_hash;
1168 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1169 		goto clear_hash;
1170 	if (tcp_md5_hash_key(hp, key))
1171 		goto clear_hash;
1172 	if (crypto_hash_final(desc, md5_hash))
1173 		goto clear_hash;
1174 
1175 	tcp_put_md5sig_pool();
1176 	return 0;
1177 
1178 clear_hash:
1179 	tcp_put_md5sig_pool();
1180 clear_hash_noput:
1181 	memset(md5_hash, 0, 16);
1182 	return 1;
1183 }
1184 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1185 
1186 static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1187 {
1188 	/*
1189 	 * This gets called for each TCP segment that arrives
1190 	 * so we want to be efficient.
1191 	 * We have 3 drop cases:
1192 	 * o No MD5 hash and one expected.
1193 	 * o MD5 hash and we're not expecting one.
1194 	 * o MD5 hash and its wrong.
1195 	 */
1196 	const __u8 *hash_location = NULL;
1197 	struct tcp_md5sig_key *hash_expected;
1198 	const struct iphdr *iph = ip_hdr(skb);
1199 	const struct tcphdr *th = tcp_hdr(skb);
1200 	int genhash;
1201 	unsigned char newhash[16];
1202 
1203 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1204 					  AF_INET);
1205 	hash_location = tcp_parse_md5sig_option(th);
1206 
1207 	/* We've parsed the options - do we have a hash? */
1208 	if (!hash_expected && !hash_location)
1209 		return 0;
1210 
1211 	if (hash_expected && !hash_location) {
1212 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1213 		return 1;
1214 	}
1215 
1216 	if (!hash_expected && hash_location) {
1217 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1218 		return 1;
1219 	}
1220 
1221 	/* Okay, so this is hash_expected and hash_location -
1222 	 * so we need to calculate the checksum.
1223 	 */
1224 	genhash = tcp_v4_md5_hash_skb(newhash,
1225 				      hash_expected,
1226 				      NULL, NULL, skb);
1227 
1228 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1229 		if (net_ratelimit()) {
1230 			pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1231 				&iph->saddr, ntohs(th->source),
1232 				&iph->daddr, ntohs(th->dest),
1233 				genhash ? " tcp_v4_calc_md5_hash failed" : "");
1234 		}
1235 		return 1;
1236 	}
1237 	return 0;
1238 }
1239 
1240 #endif
1241 
1242 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1243 	.family		=	PF_INET,
1244 	.obj_size	=	sizeof(struct tcp_request_sock),
1245 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1246 	.send_ack	=	tcp_v4_reqsk_send_ack,
1247 	.destructor	=	tcp_v4_reqsk_destructor,
1248 	.send_reset	=	tcp_v4_send_reset,
1249 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1250 };
1251 
1252 #ifdef CONFIG_TCP_MD5SIG
1253 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1254 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1255 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1256 };
1257 #endif
1258 
1259 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1260 {
1261 	struct tcp_extend_values tmp_ext;
1262 	struct tcp_options_received tmp_opt;
1263 	const u8 *hash_location;
1264 	struct request_sock *req;
1265 	struct inet_request_sock *ireq;
1266 	struct tcp_sock *tp = tcp_sk(sk);
1267 	struct dst_entry *dst = NULL;
1268 	__be32 saddr = ip_hdr(skb)->saddr;
1269 	__be32 daddr = ip_hdr(skb)->daddr;
1270 	__u32 isn = TCP_SKB_CB(skb)->when;
1271 	int want_cookie = 0;
1272 
1273 	/* Never answer to SYNs send to broadcast or multicast */
1274 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1275 		goto drop;
1276 
1277 	/* TW buckets are converted to open requests without
1278 	 * limitations, they conserve resources and peer is
1279 	 * evidently real one.
1280 	 */
1281 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1282 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283 		if (!want_cookie)
1284 			goto drop;
1285 	}
1286 
1287 	/* Accept backlog is full. If we have already queued enough
1288 	 * of warm entries in syn queue, drop request. It is better than
1289 	 * clogging syn queue with openreqs with exponentially increasing
1290 	 * timeout.
1291 	 */
1292 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1293 		goto drop;
1294 
1295 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1296 	if (!req)
1297 		goto drop;
1298 
1299 #ifdef CONFIG_TCP_MD5SIG
1300 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1301 #endif
1302 
1303 	tcp_clear_options(&tmp_opt);
1304 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1305 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1306 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1307 
1308 	if (tmp_opt.cookie_plus > 0 &&
1309 	    tmp_opt.saw_tstamp &&
1310 	    !tp->rx_opt.cookie_out_never &&
1311 	    (sysctl_tcp_cookie_size > 0 ||
1312 	     (tp->cookie_values != NULL &&
1313 	      tp->cookie_values->cookie_desired > 0))) {
1314 		u8 *c;
1315 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1316 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1317 
1318 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1319 			goto drop_and_release;
1320 
1321 		/* Secret recipe starts with IP addresses */
1322 		*mess++ ^= (__force u32)daddr;
1323 		*mess++ ^= (__force u32)saddr;
1324 
1325 		/* plus variable length Initiator Cookie */
1326 		c = (u8 *)mess;
1327 		while (l-- > 0)
1328 			*c++ ^= *hash_location++;
1329 
1330 		want_cookie = 0;	/* not our kind of cookie */
1331 		tmp_ext.cookie_out_never = 0; /* false */
1332 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1333 	} else if (!tp->rx_opt.cookie_in_always) {
1334 		/* redundant indications, but ensure initialization. */
1335 		tmp_ext.cookie_out_never = 1; /* true */
1336 		tmp_ext.cookie_plus = 0;
1337 	} else {
1338 		goto drop_and_release;
1339 	}
1340 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1341 
1342 	if (want_cookie && !tmp_opt.saw_tstamp)
1343 		tcp_clear_options(&tmp_opt);
1344 
1345 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1346 	tcp_openreq_init(req, &tmp_opt, skb);
1347 
1348 	ireq = inet_rsk(req);
1349 	ireq->loc_addr = daddr;
1350 	ireq->rmt_addr = saddr;
1351 	ireq->no_srccheck = inet_sk(sk)->transparent;
1352 	ireq->opt = tcp_v4_save_options(sk, skb);
1353 
1354 	if (security_inet_conn_request(sk, skb, req))
1355 		goto drop_and_free;
1356 
1357 	if (!want_cookie || tmp_opt.tstamp_ok)
1358 		TCP_ECN_create_request(req, tcp_hdr(skb));
1359 
1360 	if (want_cookie) {
1361 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1362 		req->cookie_ts = tmp_opt.tstamp_ok;
1363 	} else if (!isn) {
1364 		struct inet_peer *peer = NULL;
1365 		struct flowi4 fl4;
1366 
1367 		/* VJ's idea. We save last timestamp seen
1368 		 * from the destination in peer table, when entering
1369 		 * state TIME-WAIT, and check against it before
1370 		 * accepting new connection request.
1371 		 *
1372 		 * If "isn" is not zero, this request hit alive
1373 		 * timewait bucket, so that all the necessary checks
1374 		 * are made in the function processing timewait state.
1375 		 */
1376 		if (tmp_opt.saw_tstamp &&
1377 		    tcp_death_row.sysctl_tw_recycle &&
1378 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1379 		    fl4.daddr == saddr &&
1380 		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1381 			inet_peer_refcheck(peer);
1382 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1383 			    (s32)(peer->tcp_ts - req->ts_recent) >
1384 							TCP_PAWS_WINDOW) {
1385 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1386 				goto drop_and_release;
1387 			}
1388 		}
1389 		/* Kill the following clause, if you dislike this way. */
1390 		else if (!sysctl_tcp_syncookies &&
1391 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1392 			  (sysctl_max_syn_backlog >> 2)) &&
1393 			 (!peer || !peer->tcp_ts_stamp) &&
1394 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1395 			/* Without syncookies last quarter of
1396 			 * backlog is filled with destinations,
1397 			 * proven to be alive.
1398 			 * It means that we continue to communicate
1399 			 * to destinations, already remembered
1400 			 * to the moment of synflood.
1401 			 */
1402 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1403 				       &saddr, ntohs(tcp_hdr(skb)->source));
1404 			goto drop_and_release;
1405 		}
1406 
1407 		isn = tcp_v4_init_sequence(skb);
1408 	}
1409 	tcp_rsk(req)->snt_isn = isn;
1410 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
1411 
1412 	if (tcp_v4_send_synack(sk, dst, req,
1413 			       (struct request_values *)&tmp_ext) ||
1414 	    want_cookie)
1415 		goto drop_and_free;
1416 
1417 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1418 	return 0;
1419 
1420 drop_and_release:
1421 	dst_release(dst);
1422 drop_and_free:
1423 	reqsk_free(req);
1424 drop:
1425 	return 0;
1426 }
1427 EXPORT_SYMBOL(tcp_v4_conn_request);
1428 
1429 
1430 /*
1431  * The three way handshake has completed - we got a valid synack -
1432  * now create the new socket.
1433  */
1434 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1435 				  struct request_sock *req,
1436 				  struct dst_entry *dst)
1437 {
1438 	struct inet_request_sock *ireq;
1439 	struct inet_sock *newinet;
1440 	struct tcp_sock *newtp;
1441 	struct sock *newsk;
1442 #ifdef CONFIG_TCP_MD5SIG
1443 	struct tcp_md5sig_key *key;
1444 #endif
1445 	struct ip_options_rcu *inet_opt;
1446 
1447 	if (sk_acceptq_is_full(sk))
1448 		goto exit_overflow;
1449 
1450 	newsk = tcp_create_openreq_child(sk, req, skb);
1451 	if (!newsk)
1452 		goto exit_nonewsk;
1453 
1454 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1455 
1456 	newtp		      = tcp_sk(newsk);
1457 	newinet		      = inet_sk(newsk);
1458 	ireq		      = inet_rsk(req);
1459 	newinet->inet_daddr   = ireq->rmt_addr;
1460 	newinet->inet_rcv_saddr = ireq->loc_addr;
1461 	newinet->inet_saddr	      = ireq->loc_addr;
1462 	inet_opt	      = ireq->opt;
1463 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1464 	ireq->opt	      = NULL;
1465 	newinet->mc_index     = inet_iif(skb);
1466 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1467 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1468 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1469 	if (inet_opt)
1470 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1471 	newinet->inet_id = newtp->write_seq ^ jiffies;
1472 
1473 	if (!dst) {
1474 		dst = inet_csk_route_child_sock(sk, newsk, req);
1475 		if (!dst)
1476 			goto put_and_exit;
1477 	} else {
1478 		/* syncookie case : see end of cookie_v4_check() */
1479 	}
1480 	sk_setup_caps(newsk, dst);
1481 
1482 	tcp_mtup_init(newsk);
1483 	tcp_sync_mss(newsk, dst_mtu(dst));
1484 	newtp->advmss = dst_metric_advmss(dst);
1485 	if (tcp_sk(sk)->rx_opt.user_mss &&
1486 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1487 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1488 
1489 	tcp_initialize_rcv_mss(newsk);
1490 	if (tcp_rsk(req)->snt_synack)
1491 		tcp_valid_rtt_meas(newsk,
1492 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1493 	newtp->total_retrans = req->retrans;
1494 
1495 #ifdef CONFIG_TCP_MD5SIG
1496 	/* Copy over the MD5 key from the original socket */
1497 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1498 				AF_INET);
1499 	if (key != NULL) {
1500 		/*
1501 		 * We're using one, so create a matching key
1502 		 * on the newsk structure. If we fail to get
1503 		 * memory, then we end up not copying the key
1504 		 * across. Shucks.
1505 		 */
1506 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1507 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1508 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1509 	}
1510 #endif
1511 
1512 	if (__inet_inherit_port(sk, newsk) < 0)
1513 		goto put_and_exit;
1514 	__inet_hash_nolisten(newsk, NULL);
1515 
1516 	return newsk;
1517 
1518 exit_overflow:
1519 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1520 exit_nonewsk:
1521 	dst_release(dst);
1522 exit:
1523 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1524 	return NULL;
1525 put_and_exit:
1526 	tcp_clear_xmit_timers(newsk);
1527 	tcp_cleanup_congestion_control(newsk);
1528 	bh_unlock_sock(newsk);
1529 	sock_put(newsk);
1530 	goto exit;
1531 }
1532 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1533 
1534 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1535 {
1536 	struct tcphdr *th = tcp_hdr(skb);
1537 	const struct iphdr *iph = ip_hdr(skb);
1538 	struct sock *nsk;
1539 	struct request_sock **prev;
1540 	/* Find possible connection requests. */
1541 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1542 						       iph->saddr, iph->daddr);
1543 	if (req)
1544 		return tcp_check_req(sk, skb, req, prev);
1545 
1546 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1547 			th->source, iph->daddr, th->dest, inet_iif(skb));
1548 
1549 	if (nsk) {
1550 		if (nsk->sk_state != TCP_TIME_WAIT) {
1551 			bh_lock_sock(nsk);
1552 			return nsk;
1553 		}
1554 		inet_twsk_put(inet_twsk(nsk));
1555 		return NULL;
1556 	}
1557 
1558 #ifdef CONFIG_SYN_COOKIES
1559 	if (!th->syn)
1560 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1561 #endif
1562 	return sk;
1563 }
1564 
1565 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1566 {
1567 	const struct iphdr *iph = ip_hdr(skb);
1568 
1569 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1570 		if (!tcp_v4_check(skb->len, iph->saddr,
1571 				  iph->daddr, skb->csum)) {
1572 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1573 			return 0;
1574 		}
1575 	}
1576 
1577 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1578 				       skb->len, IPPROTO_TCP, 0);
1579 
1580 	if (skb->len <= 76) {
1581 		return __skb_checksum_complete(skb);
1582 	}
1583 	return 0;
1584 }
1585 
1586 
1587 /* The socket must have it's spinlock held when we get
1588  * here.
1589  *
1590  * We have a potential double-lock case here, so even when
1591  * doing backlog processing we use the BH locking scheme.
1592  * This is because we cannot sleep with the original spinlock
1593  * held.
1594  */
1595 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1596 {
1597 	struct sock *rsk;
1598 #ifdef CONFIG_TCP_MD5SIG
1599 	/*
1600 	 * We really want to reject the packet as early as possible
1601 	 * if:
1602 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1603 	 *  o There is an MD5 option and we're not expecting one
1604 	 */
1605 	if (tcp_v4_inbound_md5_hash(sk, skb))
1606 		goto discard;
1607 #endif
1608 
1609 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1610 		sock_rps_save_rxhash(sk, skb);
1611 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1612 			rsk = sk;
1613 			goto reset;
1614 		}
1615 		return 0;
1616 	}
1617 
1618 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1619 		goto csum_err;
1620 
1621 	if (sk->sk_state == TCP_LISTEN) {
1622 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1623 		if (!nsk)
1624 			goto discard;
1625 
1626 		if (nsk != sk) {
1627 			sock_rps_save_rxhash(nsk, skb);
1628 			if (tcp_child_process(sk, nsk, skb)) {
1629 				rsk = nsk;
1630 				goto reset;
1631 			}
1632 			return 0;
1633 		}
1634 	} else
1635 		sock_rps_save_rxhash(sk, skb);
1636 
1637 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1638 		rsk = sk;
1639 		goto reset;
1640 	}
1641 	return 0;
1642 
1643 reset:
1644 	tcp_v4_send_reset(rsk, skb);
1645 discard:
1646 	kfree_skb(skb);
1647 	/* Be careful here. If this function gets more complicated and
1648 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1649 	 * might be destroyed here. This current version compiles correctly,
1650 	 * but you have been warned.
1651 	 */
1652 	return 0;
1653 
1654 csum_err:
1655 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1656 	goto discard;
1657 }
1658 EXPORT_SYMBOL(tcp_v4_do_rcv);
1659 
1660 /*
1661  *	From tcp_input.c
1662  */
1663 
1664 int tcp_v4_rcv(struct sk_buff *skb)
1665 {
1666 	const struct iphdr *iph;
1667 	const struct tcphdr *th;
1668 	struct sock *sk;
1669 	int ret;
1670 	struct net *net = dev_net(skb->dev);
1671 
1672 	if (skb->pkt_type != PACKET_HOST)
1673 		goto discard_it;
1674 
1675 	/* Count it even if it's bad */
1676 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1677 
1678 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1679 		goto discard_it;
1680 
1681 	th = tcp_hdr(skb);
1682 
1683 	if (th->doff < sizeof(struct tcphdr) / 4)
1684 		goto bad_packet;
1685 	if (!pskb_may_pull(skb, th->doff * 4))
1686 		goto discard_it;
1687 
1688 	/* An explanation is required here, I think.
1689 	 * Packet length and doff are validated by header prediction,
1690 	 * provided case of th->doff==0 is eliminated.
1691 	 * So, we defer the checks. */
1692 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1693 		goto bad_packet;
1694 
1695 	th = tcp_hdr(skb);
1696 	iph = ip_hdr(skb);
1697 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1698 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1699 				    skb->len - th->doff * 4);
1700 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1701 	TCP_SKB_CB(skb)->when	 = 0;
1702 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1703 	TCP_SKB_CB(skb)->sacked	 = 0;
1704 
1705 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1706 	if (!sk)
1707 		goto no_tcp_socket;
1708 
1709 process:
1710 	if (sk->sk_state == TCP_TIME_WAIT)
1711 		goto do_time_wait;
1712 
1713 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1714 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1715 		goto discard_and_relse;
1716 	}
1717 
1718 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1719 		goto discard_and_relse;
1720 	nf_reset(skb);
1721 
1722 	if (sk_filter(sk, skb))
1723 		goto discard_and_relse;
1724 
1725 	skb->dev = NULL;
1726 
1727 	bh_lock_sock_nested(sk);
1728 	ret = 0;
1729 	if (!sock_owned_by_user(sk)) {
1730 #ifdef CONFIG_NET_DMA
1731 		struct tcp_sock *tp = tcp_sk(sk);
1732 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1733 			tp->ucopy.dma_chan = net_dma_find_channel();
1734 		if (tp->ucopy.dma_chan)
1735 			ret = tcp_v4_do_rcv(sk, skb);
1736 		else
1737 #endif
1738 		{
1739 			if (!tcp_prequeue(sk, skb))
1740 				ret = tcp_v4_do_rcv(sk, skb);
1741 		}
1742 	} else if (unlikely(sk_add_backlog(sk, skb))) {
1743 		bh_unlock_sock(sk);
1744 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1745 		goto discard_and_relse;
1746 	}
1747 	bh_unlock_sock(sk);
1748 
1749 	sock_put(sk);
1750 
1751 	return ret;
1752 
1753 no_tcp_socket:
1754 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1755 		goto discard_it;
1756 
1757 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1758 bad_packet:
1759 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1760 	} else {
1761 		tcp_v4_send_reset(NULL, skb);
1762 	}
1763 
1764 discard_it:
1765 	/* Discard frame. */
1766 	kfree_skb(skb);
1767 	return 0;
1768 
1769 discard_and_relse:
1770 	sock_put(sk);
1771 	goto discard_it;
1772 
1773 do_time_wait:
1774 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1775 		inet_twsk_put(inet_twsk(sk));
1776 		goto discard_it;
1777 	}
1778 
1779 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1780 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1781 		inet_twsk_put(inet_twsk(sk));
1782 		goto discard_it;
1783 	}
1784 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1785 	case TCP_TW_SYN: {
1786 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1787 							&tcp_hashinfo,
1788 							iph->daddr, th->dest,
1789 							inet_iif(skb));
1790 		if (sk2) {
1791 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1792 			inet_twsk_put(inet_twsk(sk));
1793 			sk = sk2;
1794 			goto process;
1795 		}
1796 		/* Fall through to ACK */
1797 	}
1798 	case TCP_TW_ACK:
1799 		tcp_v4_timewait_ack(sk, skb);
1800 		break;
1801 	case TCP_TW_RST:
1802 		goto no_tcp_socket;
1803 	case TCP_TW_SUCCESS:;
1804 	}
1805 	goto discard_it;
1806 }
1807 
1808 struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1809 {
1810 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1811 	struct inet_sock *inet = inet_sk(sk);
1812 	struct inet_peer *peer;
1813 
1814 	if (!rt ||
1815 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1816 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
1817 		*release_it = true;
1818 	} else {
1819 		if (!rt->peer)
1820 			rt_bind_peer(rt, inet->inet_daddr, 1);
1821 		peer = rt->peer;
1822 		*release_it = false;
1823 	}
1824 
1825 	return peer;
1826 }
1827 EXPORT_SYMBOL(tcp_v4_get_peer);
1828 
1829 void *tcp_v4_tw_get_peer(struct sock *sk)
1830 {
1831 	const struct inet_timewait_sock *tw = inet_twsk(sk);
1832 
1833 	return inet_getpeer_v4(tw->tw_daddr, 1);
1834 }
1835 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1836 
1837 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1838 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1839 	.twsk_unique	= tcp_twsk_unique,
1840 	.twsk_destructor= tcp_twsk_destructor,
1841 	.twsk_getpeer	= tcp_v4_tw_get_peer,
1842 };
1843 
1844 const struct inet_connection_sock_af_ops ipv4_specific = {
1845 	.queue_xmit	   = ip_queue_xmit,
1846 	.send_check	   = tcp_v4_send_check,
1847 	.rebuild_header	   = inet_sk_rebuild_header,
1848 	.conn_request	   = tcp_v4_conn_request,
1849 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1850 	.get_peer	   = tcp_v4_get_peer,
1851 	.net_header_len	   = sizeof(struct iphdr),
1852 	.setsockopt	   = ip_setsockopt,
1853 	.getsockopt	   = ip_getsockopt,
1854 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1855 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1856 	.bind_conflict	   = inet_csk_bind_conflict,
1857 #ifdef CONFIG_COMPAT
1858 	.compat_setsockopt = compat_ip_setsockopt,
1859 	.compat_getsockopt = compat_ip_getsockopt,
1860 #endif
1861 };
1862 EXPORT_SYMBOL(ipv4_specific);
1863 
1864 #ifdef CONFIG_TCP_MD5SIG
1865 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1866 	.md5_lookup		= tcp_v4_md5_lookup,
1867 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1868 	.md5_parse		= tcp_v4_parse_md5_keys,
1869 };
1870 #endif
1871 
1872 /* NOTE: A lot of things set to zero explicitly by call to
1873  *       sk_alloc() so need not be done here.
1874  */
1875 static int tcp_v4_init_sock(struct sock *sk)
1876 {
1877 	struct inet_connection_sock *icsk = inet_csk(sk);
1878 	struct tcp_sock *tp = tcp_sk(sk);
1879 
1880 	skb_queue_head_init(&tp->out_of_order_queue);
1881 	tcp_init_xmit_timers(sk);
1882 	tcp_prequeue_init(tp);
1883 
1884 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1885 	tp->mdev = TCP_TIMEOUT_INIT;
1886 
1887 	/* So many TCP implementations out there (incorrectly) count the
1888 	 * initial SYN frame in their delayed-ACK and congestion control
1889 	 * algorithms that we must have the following bandaid to talk
1890 	 * efficiently to them.  -DaveM
1891 	 */
1892 	tp->snd_cwnd = TCP_INIT_CWND;
1893 
1894 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1895 	 * initialization of these values.
1896 	 */
1897 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1898 	tp->snd_cwnd_clamp = ~0;
1899 	tp->mss_cache = TCP_MSS_DEFAULT;
1900 
1901 	tp->reordering = sysctl_tcp_reordering;
1902 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1903 
1904 	sk->sk_state = TCP_CLOSE;
1905 
1906 	sk->sk_write_space = sk_stream_write_space;
1907 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1908 
1909 	icsk->icsk_af_ops = &ipv4_specific;
1910 	icsk->icsk_sync_mss = tcp_sync_mss;
1911 #ifdef CONFIG_TCP_MD5SIG
1912 	tp->af_specific = &tcp_sock_ipv4_specific;
1913 #endif
1914 
1915 	/* TCP Cookie Transactions */
1916 	if (sysctl_tcp_cookie_size > 0) {
1917 		/* Default, cookies without s_data_payload. */
1918 		tp->cookie_values =
1919 			kzalloc(sizeof(*tp->cookie_values),
1920 				sk->sk_allocation);
1921 		if (tp->cookie_values != NULL)
1922 			kref_init(&tp->cookie_values->kref);
1923 	}
1924 	/* Presumed zeroed, in order of appearance:
1925 	 *	cookie_in_always, cookie_out_never,
1926 	 *	s_data_constant, s_data_in, s_data_out
1927 	 */
1928 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1929 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1930 
1931 	local_bh_disable();
1932 	sock_update_memcg(sk);
1933 	sk_sockets_allocated_inc(sk);
1934 	local_bh_enable();
1935 
1936 	return 0;
1937 }
1938 
1939 void tcp_v4_destroy_sock(struct sock *sk)
1940 {
1941 	struct tcp_sock *tp = tcp_sk(sk);
1942 
1943 	tcp_clear_xmit_timers(sk);
1944 
1945 	tcp_cleanup_congestion_control(sk);
1946 
1947 	/* Cleanup up the write buffer. */
1948 	tcp_write_queue_purge(sk);
1949 
1950 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1951 	__skb_queue_purge(&tp->out_of_order_queue);
1952 
1953 #ifdef CONFIG_TCP_MD5SIG
1954 	/* Clean up the MD5 key list, if any */
1955 	if (tp->md5sig_info) {
1956 		tcp_clear_md5_list(sk);
1957 		kfree_rcu(tp->md5sig_info, rcu);
1958 		tp->md5sig_info = NULL;
1959 	}
1960 #endif
1961 
1962 #ifdef CONFIG_NET_DMA
1963 	/* Cleans up our sk_async_wait_queue */
1964 	__skb_queue_purge(&sk->sk_async_wait_queue);
1965 #endif
1966 
1967 	/* Clean prequeue, it must be empty really */
1968 	__skb_queue_purge(&tp->ucopy.prequeue);
1969 
1970 	/* Clean up a referenced TCP bind bucket. */
1971 	if (inet_csk(sk)->icsk_bind_hash)
1972 		inet_put_port(sk);
1973 
1974 	/*
1975 	 * If sendmsg cached page exists, toss it.
1976 	 */
1977 	if (sk->sk_sndmsg_page) {
1978 		__free_page(sk->sk_sndmsg_page);
1979 		sk->sk_sndmsg_page = NULL;
1980 	}
1981 
1982 	/* TCP Cookie Transactions */
1983 	if (tp->cookie_values != NULL) {
1984 		kref_put(&tp->cookie_values->kref,
1985 			 tcp_cookie_values_release);
1986 		tp->cookie_values = NULL;
1987 	}
1988 
1989 	sk_sockets_allocated_dec(sk);
1990 	sock_release_memcg(sk);
1991 }
1992 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1993 
1994 #ifdef CONFIG_PROC_FS
1995 /* Proc filesystem TCP sock list dumping. */
1996 
1997 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1998 {
1999 	return hlist_nulls_empty(head) ? NULL :
2000 		list_entry(head->first, struct inet_timewait_sock, tw_node);
2001 }
2002 
2003 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2004 {
2005 	return !is_a_nulls(tw->tw_node.next) ?
2006 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2007 }
2008 
2009 /*
2010  * Get next listener socket follow cur.  If cur is NULL, get first socket
2011  * starting from bucket given in st->bucket; when st->bucket is zero the
2012  * very first socket in the hash table is returned.
2013  */
2014 static void *listening_get_next(struct seq_file *seq, void *cur)
2015 {
2016 	struct inet_connection_sock *icsk;
2017 	struct hlist_nulls_node *node;
2018 	struct sock *sk = cur;
2019 	struct inet_listen_hashbucket *ilb;
2020 	struct tcp_iter_state *st = seq->private;
2021 	struct net *net = seq_file_net(seq);
2022 
2023 	if (!sk) {
2024 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2025 		spin_lock_bh(&ilb->lock);
2026 		sk = sk_nulls_head(&ilb->head);
2027 		st->offset = 0;
2028 		goto get_sk;
2029 	}
2030 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
2031 	++st->num;
2032 	++st->offset;
2033 
2034 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2035 		struct request_sock *req = cur;
2036 
2037 		icsk = inet_csk(st->syn_wait_sk);
2038 		req = req->dl_next;
2039 		while (1) {
2040 			while (req) {
2041 				if (req->rsk_ops->family == st->family) {
2042 					cur = req;
2043 					goto out;
2044 				}
2045 				req = req->dl_next;
2046 			}
2047 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2048 				break;
2049 get_req:
2050 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2051 		}
2052 		sk	  = sk_nulls_next(st->syn_wait_sk);
2053 		st->state = TCP_SEQ_STATE_LISTENING;
2054 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2055 	} else {
2056 		icsk = inet_csk(sk);
2057 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2058 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2059 			goto start_req;
2060 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2061 		sk = sk_nulls_next(sk);
2062 	}
2063 get_sk:
2064 	sk_nulls_for_each_from(sk, node) {
2065 		if (!net_eq(sock_net(sk), net))
2066 			continue;
2067 		if (sk->sk_family == st->family) {
2068 			cur = sk;
2069 			goto out;
2070 		}
2071 		icsk = inet_csk(sk);
2072 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2073 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2074 start_req:
2075 			st->uid		= sock_i_uid(sk);
2076 			st->syn_wait_sk = sk;
2077 			st->state	= TCP_SEQ_STATE_OPENREQ;
2078 			st->sbucket	= 0;
2079 			goto get_req;
2080 		}
2081 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2082 	}
2083 	spin_unlock_bh(&ilb->lock);
2084 	st->offset = 0;
2085 	if (++st->bucket < INET_LHTABLE_SIZE) {
2086 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2087 		spin_lock_bh(&ilb->lock);
2088 		sk = sk_nulls_head(&ilb->head);
2089 		goto get_sk;
2090 	}
2091 	cur = NULL;
2092 out:
2093 	return cur;
2094 }
2095 
2096 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2097 {
2098 	struct tcp_iter_state *st = seq->private;
2099 	void *rc;
2100 
2101 	st->bucket = 0;
2102 	st->offset = 0;
2103 	rc = listening_get_next(seq, NULL);
2104 
2105 	while (rc && *pos) {
2106 		rc = listening_get_next(seq, rc);
2107 		--*pos;
2108 	}
2109 	return rc;
2110 }
2111 
2112 static inline int empty_bucket(struct tcp_iter_state *st)
2113 {
2114 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2115 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2116 }
2117 
2118 /*
2119  * Get first established socket starting from bucket given in st->bucket.
2120  * If st->bucket is zero, the very first socket in the hash is returned.
2121  */
2122 static void *established_get_first(struct seq_file *seq)
2123 {
2124 	struct tcp_iter_state *st = seq->private;
2125 	struct net *net = seq_file_net(seq);
2126 	void *rc = NULL;
2127 
2128 	st->offset = 0;
2129 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2130 		struct sock *sk;
2131 		struct hlist_nulls_node *node;
2132 		struct inet_timewait_sock *tw;
2133 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2134 
2135 		/* Lockless fast path for the common case of empty buckets */
2136 		if (empty_bucket(st))
2137 			continue;
2138 
2139 		spin_lock_bh(lock);
2140 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2141 			if (sk->sk_family != st->family ||
2142 			    !net_eq(sock_net(sk), net)) {
2143 				continue;
2144 			}
2145 			rc = sk;
2146 			goto out;
2147 		}
2148 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2149 		inet_twsk_for_each(tw, node,
2150 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2151 			if (tw->tw_family != st->family ||
2152 			    !net_eq(twsk_net(tw), net)) {
2153 				continue;
2154 			}
2155 			rc = tw;
2156 			goto out;
2157 		}
2158 		spin_unlock_bh(lock);
2159 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2160 	}
2161 out:
2162 	return rc;
2163 }
2164 
2165 static void *established_get_next(struct seq_file *seq, void *cur)
2166 {
2167 	struct sock *sk = cur;
2168 	struct inet_timewait_sock *tw;
2169 	struct hlist_nulls_node *node;
2170 	struct tcp_iter_state *st = seq->private;
2171 	struct net *net = seq_file_net(seq);
2172 
2173 	++st->num;
2174 	++st->offset;
2175 
2176 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2177 		tw = cur;
2178 		tw = tw_next(tw);
2179 get_tw:
2180 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2181 			tw = tw_next(tw);
2182 		}
2183 		if (tw) {
2184 			cur = tw;
2185 			goto out;
2186 		}
2187 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2188 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2189 
2190 		/* Look for next non empty bucket */
2191 		st->offset = 0;
2192 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2193 				empty_bucket(st))
2194 			;
2195 		if (st->bucket > tcp_hashinfo.ehash_mask)
2196 			return NULL;
2197 
2198 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2199 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2200 	} else
2201 		sk = sk_nulls_next(sk);
2202 
2203 	sk_nulls_for_each_from(sk, node) {
2204 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2205 			goto found;
2206 	}
2207 
2208 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2209 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2210 	goto get_tw;
2211 found:
2212 	cur = sk;
2213 out:
2214 	return cur;
2215 }
2216 
2217 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2218 {
2219 	struct tcp_iter_state *st = seq->private;
2220 	void *rc;
2221 
2222 	st->bucket = 0;
2223 	rc = established_get_first(seq);
2224 
2225 	while (rc && pos) {
2226 		rc = established_get_next(seq, rc);
2227 		--pos;
2228 	}
2229 	return rc;
2230 }
2231 
2232 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2233 {
2234 	void *rc;
2235 	struct tcp_iter_state *st = seq->private;
2236 
2237 	st->state = TCP_SEQ_STATE_LISTENING;
2238 	rc	  = listening_get_idx(seq, &pos);
2239 
2240 	if (!rc) {
2241 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2242 		rc	  = established_get_idx(seq, pos);
2243 	}
2244 
2245 	return rc;
2246 }
2247 
2248 static void *tcp_seek_last_pos(struct seq_file *seq)
2249 {
2250 	struct tcp_iter_state *st = seq->private;
2251 	int offset = st->offset;
2252 	int orig_num = st->num;
2253 	void *rc = NULL;
2254 
2255 	switch (st->state) {
2256 	case TCP_SEQ_STATE_OPENREQ:
2257 	case TCP_SEQ_STATE_LISTENING:
2258 		if (st->bucket >= INET_LHTABLE_SIZE)
2259 			break;
2260 		st->state = TCP_SEQ_STATE_LISTENING;
2261 		rc = listening_get_next(seq, NULL);
2262 		while (offset-- && rc)
2263 			rc = listening_get_next(seq, rc);
2264 		if (rc)
2265 			break;
2266 		st->bucket = 0;
2267 		/* Fallthrough */
2268 	case TCP_SEQ_STATE_ESTABLISHED:
2269 	case TCP_SEQ_STATE_TIME_WAIT:
2270 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2271 		if (st->bucket > tcp_hashinfo.ehash_mask)
2272 			break;
2273 		rc = established_get_first(seq);
2274 		while (offset-- && rc)
2275 			rc = established_get_next(seq, rc);
2276 	}
2277 
2278 	st->num = orig_num;
2279 
2280 	return rc;
2281 }
2282 
2283 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2284 {
2285 	struct tcp_iter_state *st = seq->private;
2286 	void *rc;
2287 
2288 	if (*pos && *pos == st->last_pos) {
2289 		rc = tcp_seek_last_pos(seq);
2290 		if (rc)
2291 			goto out;
2292 	}
2293 
2294 	st->state = TCP_SEQ_STATE_LISTENING;
2295 	st->num = 0;
2296 	st->bucket = 0;
2297 	st->offset = 0;
2298 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2299 
2300 out:
2301 	st->last_pos = *pos;
2302 	return rc;
2303 }
2304 
2305 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2306 {
2307 	struct tcp_iter_state *st = seq->private;
2308 	void *rc = NULL;
2309 
2310 	if (v == SEQ_START_TOKEN) {
2311 		rc = tcp_get_idx(seq, 0);
2312 		goto out;
2313 	}
2314 
2315 	switch (st->state) {
2316 	case TCP_SEQ_STATE_OPENREQ:
2317 	case TCP_SEQ_STATE_LISTENING:
2318 		rc = listening_get_next(seq, v);
2319 		if (!rc) {
2320 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2321 			st->bucket = 0;
2322 			st->offset = 0;
2323 			rc	  = established_get_first(seq);
2324 		}
2325 		break;
2326 	case TCP_SEQ_STATE_ESTABLISHED:
2327 	case TCP_SEQ_STATE_TIME_WAIT:
2328 		rc = established_get_next(seq, v);
2329 		break;
2330 	}
2331 out:
2332 	++*pos;
2333 	st->last_pos = *pos;
2334 	return rc;
2335 }
2336 
2337 static void tcp_seq_stop(struct seq_file *seq, void *v)
2338 {
2339 	struct tcp_iter_state *st = seq->private;
2340 
2341 	switch (st->state) {
2342 	case TCP_SEQ_STATE_OPENREQ:
2343 		if (v) {
2344 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2345 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2346 		}
2347 	case TCP_SEQ_STATE_LISTENING:
2348 		if (v != SEQ_START_TOKEN)
2349 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2350 		break;
2351 	case TCP_SEQ_STATE_TIME_WAIT:
2352 	case TCP_SEQ_STATE_ESTABLISHED:
2353 		if (v)
2354 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2355 		break;
2356 	}
2357 }
2358 
2359 int tcp_seq_open(struct inode *inode, struct file *file)
2360 {
2361 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2362 	struct tcp_iter_state *s;
2363 	int err;
2364 
2365 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2366 			  sizeof(struct tcp_iter_state));
2367 	if (err < 0)
2368 		return err;
2369 
2370 	s = ((struct seq_file *)file->private_data)->private;
2371 	s->family		= afinfo->family;
2372 	s->last_pos 		= 0;
2373 	return 0;
2374 }
2375 EXPORT_SYMBOL(tcp_seq_open);
2376 
2377 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2378 {
2379 	int rc = 0;
2380 	struct proc_dir_entry *p;
2381 
2382 	afinfo->seq_ops.start		= tcp_seq_start;
2383 	afinfo->seq_ops.next		= tcp_seq_next;
2384 	afinfo->seq_ops.stop		= tcp_seq_stop;
2385 
2386 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2387 			     afinfo->seq_fops, afinfo);
2388 	if (!p)
2389 		rc = -ENOMEM;
2390 	return rc;
2391 }
2392 EXPORT_SYMBOL(tcp_proc_register);
2393 
2394 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2395 {
2396 	proc_net_remove(net, afinfo->name);
2397 }
2398 EXPORT_SYMBOL(tcp_proc_unregister);
2399 
2400 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2401 			 struct seq_file *f, int i, int uid, int *len)
2402 {
2403 	const struct inet_request_sock *ireq = inet_rsk(req);
2404 	int ttd = req->expires - jiffies;
2405 
2406 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2407 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2408 		i,
2409 		ireq->loc_addr,
2410 		ntohs(inet_sk(sk)->inet_sport),
2411 		ireq->rmt_addr,
2412 		ntohs(ireq->rmt_port),
2413 		TCP_SYN_RECV,
2414 		0, 0, /* could print option size, but that is af dependent. */
2415 		1,    /* timers active (only the expire timer) */
2416 		jiffies_to_clock_t(ttd),
2417 		req->retrans,
2418 		uid,
2419 		0,  /* non standard timer */
2420 		0, /* open_requests have no inode */
2421 		atomic_read(&sk->sk_refcnt),
2422 		req,
2423 		len);
2424 }
2425 
2426 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2427 {
2428 	int timer_active;
2429 	unsigned long timer_expires;
2430 	const struct tcp_sock *tp = tcp_sk(sk);
2431 	const struct inet_connection_sock *icsk = inet_csk(sk);
2432 	const struct inet_sock *inet = inet_sk(sk);
2433 	__be32 dest = inet->inet_daddr;
2434 	__be32 src = inet->inet_rcv_saddr;
2435 	__u16 destp = ntohs(inet->inet_dport);
2436 	__u16 srcp = ntohs(inet->inet_sport);
2437 	int rx_queue;
2438 
2439 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2440 		timer_active	= 1;
2441 		timer_expires	= icsk->icsk_timeout;
2442 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2443 		timer_active	= 4;
2444 		timer_expires	= icsk->icsk_timeout;
2445 	} else if (timer_pending(&sk->sk_timer)) {
2446 		timer_active	= 2;
2447 		timer_expires	= sk->sk_timer.expires;
2448 	} else {
2449 		timer_active	= 0;
2450 		timer_expires = jiffies;
2451 	}
2452 
2453 	if (sk->sk_state == TCP_LISTEN)
2454 		rx_queue = sk->sk_ack_backlog;
2455 	else
2456 		/*
2457 		 * because we dont lock socket, we might find a transient negative value
2458 		 */
2459 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2460 
2461 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2462 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2463 		i, src, srcp, dest, destp, sk->sk_state,
2464 		tp->write_seq - tp->snd_una,
2465 		rx_queue,
2466 		timer_active,
2467 		jiffies_to_clock_t(timer_expires - jiffies),
2468 		icsk->icsk_retransmits,
2469 		sock_i_uid(sk),
2470 		icsk->icsk_probes_out,
2471 		sock_i_ino(sk),
2472 		atomic_read(&sk->sk_refcnt), sk,
2473 		jiffies_to_clock_t(icsk->icsk_rto),
2474 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2475 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2476 		tp->snd_cwnd,
2477 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2478 		len);
2479 }
2480 
2481 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2482 			       struct seq_file *f, int i, int *len)
2483 {
2484 	__be32 dest, src;
2485 	__u16 destp, srcp;
2486 	int ttd = tw->tw_ttd - jiffies;
2487 
2488 	if (ttd < 0)
2489 		ttd = 0;
2490 
2491 	dest  = tw->tw_daddr;
2492 	src   = tw->tw_rcv_saddr;
2493 	destp = ntohs(tw->tw_dport);
2494 	srcp  = ntohs(tw->tw_sport);
2495 
2496 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2497 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2498 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2499 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2500 		atomic_read(&tw->tw_refcnt), tw, len);
2501 }
2502 
2503 #define TMPSZ 150
2504 
2505 static int tcp4_seq_show(struct seq_file *seq, void *v)
2506 {
2507 	struct tcp_iter_state *st;
2508 	int len;
2509 
2510 	if (v == SEQ_START_TOKEN) {
2511 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2512 			   "  sl  local_address rem_address   st tx_queue "
2513 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2514 			   "inode");
2515 		goto out;
2516 	}
2517 	st = seq->private;
2518 
2519 	switch (st->state) {
2520 	case TCP_SEQ_STATE_LISTENING:
2521 	case TCP_SEQ_STATE_ESTABLISHED:
2522 		get_tcp4_sock(v, seq, st->num, &len);
2523 		break;
2524 	case TCP_SEQ_STATE_OPENREQ:
2525 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2526 		break;
2527 	case TCP_SEQ_STATE_TIME_WAIT:
2528 		get_timewait4_sock(v, seq, st->num, &len);
2529 		break;
2530 	}
2531 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2532 out:
2533 	return 0;
2534 }
2535 
2536 static const struct file_operations tcp_afinfo_seq_fops = {
2537 	.owner   = THIS_MODULE,
2538 	.open    = tcp_seq_open,
2539 	.read    = seq_read,
2540 	.llseek  = seq_lseek,
2541 	.release = seq_release_net
2542 };
2543 
2544 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2545 	.name		= "tcp",
2546 	.family		= AF_INET,
2547 	.seq_fops	= &tcp_afinfo_seq_fops,
2548 	.seq_ops	= {
2549 		.show		= tcp4_seq_show,
2550 	},
2551 };
2552 
2553 static int __net_init tcp4_proc_init_net(struct net *net)
2554 {
2555 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2556 }
2557 
2558 static void __net_exit tcp4_proc_exit_net(struct net *net)
2559 {
2560 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2561 }
2562 
2563 static struct pernet_operations tcp4_net_ops = {
2564 	.init = tcp4_proc_init_net,
2565 	.exit = tcp4_proc_exit_net,
2566 };
2567 
2568 int __init tcp4_proc_init(void)
2569 {
2570 	return register_pernet_subsys(&tcp4_net_ops);
2571 }
2572 
2573 void tcp4_proc_exit(void)
2574 {
2575 	unregister_pernet_subsys(&tcp4_net_ops);
2576 }
2577 #endif /* CONFIG_PROC_FS */
2578 
2579 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2580 {
2581 	const struct iphdr *iph = skb_gro_network_header(skb);
2582 
2583 	switch (skb->ip_summed) {
2584 	case CHECKSUM_COMPLETE:
2585 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2586 				  skb->csum)) {
2587 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2588 			break;
2589 		}
2590 
2591 		/* fall through */
2592 	case CHECKSUM_NONE:
2593 		NAPI_GRO_CB(skb)->flush = 1;
2594 		return NULL;
2595 	}
2596 
2597 	return tcp_gro_receive(head, skb);
2598 }
2599 
2600 int tcp4_gro_complete(struct sk_buff *skb)
2601 {
2602 	const struct iphdr *iph = ip_hdr(skb);
2603 	struct tcphdr *th = tcp_hdr(skb);
2604 
2605 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2606 				  iph->saddr, iph->daddr, 0);
2607 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2608 
2609 	return tcp_gro_complete(skb);
2610 }
2611 
2612 struct proto tcp_prot = {
2613 	.name			= "TCP",
2614 	.owner			= THIS_MODULE,
2615 	.close			= tcp_close,
2616 	.connect		= tcp_v4_connect,
2617 	.disconnect		= tcp_disconnect,
2618 	.accept			= inet_csk_accept,
2619 	.ioctl			= tcp_ioctl,
2620 	.init			= tcp_v4_init_sock,
2621 	.destroy		= tcp_v4_destroy_sock,
2622 	.shutdown		= tcp_shutdown,
2623 	.setsockopt		= tcp_setsockopt,
2624 	.getsockopt		= tcp_getsockopt,
2625 	.recvmsg		= tcp_recvmsg,
2626 	.sendmsg		= tcp_sendmsg,
2627 	.sendpage		= tcp_sendpage,
2628 	.backlog_rcv		= tcp_v4_do_rcv,
2629 	.hash			= inet_hash,
2630 	.unhash			= inet_unhash,
2631 	.get_port		= inet_csk_get_port,
2632 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2633 	.sockets_allocated	= &tcp_sockets_allocated,
2634 	.orphan_count		= &tcp_orphan_count,
2635 	.memory_allocated	= &tcp_memory_allocated,
2636 	.memory_pressure	= &tcp_memory_pressure,
2637 	.sysctl_wmem		= sysctl_tcp_wmem,
2638 	.sysctl_rmem		= sysctl_tcp_rmem,
2639 	.max_header		= MAX_TCP_HEADER,
2640 	.obj_size		= sizeof(struct tcp_sock),
2641 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2642 	.twsk_prot		= &tcp_timewait_sock_ops,
2643 	.rsk_prot		= &tcp_request_sock_ops,
2644 	.h.hashinfo		= &tcp_hashinfo,
2645 	.no_autobind		= true,
2646 #ifdef CONFIG_COMPAT
2647 	.compat_setsockopt	= compat_tcp_setsockopt,
2648 	.compat_getsockopt	= compat_tcp_getsockopt,
2649 #endif
2650 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2651 	.init_cgroup		= tcp_init_cgroup,
2652 	.destroy_cgroup		= tcp_destroy_cgroup,
2653 	.proto_cgroup		= tcp_proto_cgroup,
2654 #endif
2655 };
2656 EXPORT_SYMBOL(tcp_prot);
2657 
2658 static int __net_init tcp_sk_init(struct net *net)
2659 {
2660 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2661 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2662 }
2663 
2664 static void __net_exit tcp_sk_exit(struct net *net)
2665 {
2666 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2667 }
2668 
2669 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2670 {
2671 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2672 }
2673 
2674 static struct pernet_operations __net_initdata tcp_sk_ops = {
2675        .init	   = tcp_sk_init,
2676        .exit	   = tcp_sk_exit,
2677        .exit_batch = tcp_sk_exit_batch,
2678 };
2679 
2680 void __init tcp_v4_init(void)
2681 {
2682 	inet_hashinfo_init(&tcp_hashinfo);
2683 	if (register_pernet_subsys(&tcp_sk_ops))
2684 		panic("Failed to create the TCP control socket.\n");
2685 }
2686