xref: /openbmc/linux/net/ipv4/tcp_ipv4.c (revision 545e4006)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 
54 #include <linux/types.h>
55 #include <linux/fcntl.h>
56 #include <linux/module.h>
57 #include <linux/random.h>
58 #include <linux/cache.h>
59 #include <linux/jhash.h>
60 #include <linux/init.h>
61 #include <linux/times.h>
62 
63 #include <net/net_namespace.h>
64 #include <net/icmp.h>
65 #include <net/inet_hashtables.h>
66 #include <net/tcp.h>
67 #include <net/transp_v6.h>
68 #include <net/ipv6.h>
69 #include <net/inet_common.h>
70 #include <net/timewait_sock.h>
71 #include <net/xfrm.h>
72 #include <net/netdma.h>
73 
74 #include <linux/inet.h>
75 #include <linux/ipv6.h>
76 #include <linux/stddef.h>
77 #include <linux/proc_fs.h>
78 #include <linux/seq_file.h>
79 
80 #include <linux/crypto.h>
81 #include <linux/scatterlist.h>
82 
83 int sysctl_tcp_tw_reuse __read_mostly;
84 int sysctl_tcp_low_latency __read_mostly;
85 
86 
87 #ifdef CONFIG_TCP_MD5SIG
88 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
89 						   __be32 addr);
90 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
91 			       __be32 daddr, __be32 saddr, struct tcphdr *th);
92 #else
93 static inline
94 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
95 {
96 	return NULL;
97 }
98 #endif
99 
100 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
101 	.lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
102 	.lhash_users = ATOMIC_INIT(0),
103 	.lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
104 };
105 
106 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
107 {
108 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
109 					  ip_hdr(skb)->saddr,
110 					  tcp_hdr(skb)->dest,
111 					  tcp_hdr(skb)->source);
112 }
113 
114 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
115 {
116 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 	struct tcp_sock *tp = tcp_sk(sk);
118 
119 	/* With PAWS, it is safe from the viewpoint
120 	   of data integrity. Even without PAWS it is safe provided sequence
121 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
122 
123 	   Actually, the idea is close to VJ's one, only timestamp cache is
124 	   held not per host, but per port pair and TW bucket is used as state
125 	   holder.
126 
127 	   If TW bucket has been already destroyed we fall back to VJ's scheme
128 	   and use initial timestamp retrieved from peer table.
129 	 */
130 	if (tcptw->tw_ts_recent_stamp &&
131 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
132 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
133 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 		if (tp->write_seq == 0)
135 			tp->write_seq = 1;
136 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
137 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
138 		sock_hold(sktw);
139 		return 1;
140 	}
141 
142 	return 0;
143 }
144 
145 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146 
147 /* This will initiate an outgoing connection. */
148 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
149 {
150 	struct inet_sock *inet = inet_sk(sk);
151 	struct tcp_sock *tp = tcp_sk(sk);
152 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 	struct rtable *rt;
154 	__be32 daddr, nexthop;
155 	int tmp;
156 	int err;
157 
158 	if (addr_len < sizeof(struct sockaddr_in))
159 		return -EINVAL;
160 
161 	if (usin->sin_family != AF_INET)
162 		return -EAFNOSUPPORT;
163 
164 	nexthop = daddr = usin->sin_addr.s_addr;
165 	if (inet->opt && inet->opt->srr) {
166 		if (!daddr)
167 			return -EINVAL;
168 		nexthop = inet->opt->faddr;
169 	}
170 
171 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
172 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173 			       IPPROTO_TCP,
174 			       inet->sport, usin->sin_port, sk, 1);
175 	if (tmp < 0) {
176 		if (tmp == -ENETUNREACH)
177 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
178 		return tmp;
179 	}
180 
181 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
182 		ip_rt_put(rt);
183 		return -ENETUNREACH;
184 	}
185 
186 	if (!inet->opt || !inet->opt->srr)
187 		daddr = rt->rt_dst;
188 
189 	if (!inet->saddr)
190 		inet->saddr = rt->rt_src;
191 	inet->rcv_saddr = inet->saddr;
192 
193 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
194 		/* Reset inherited state */
195 		tp->rx_opt.ts_recent	   = 0;
196 		tp->rx_opt.ts_recent_stamp = 0;
197 		tp->write_seq		   = 0;
198 	}
199 
200 	if (tcp_death_row.sysctl_tw_recycle &&
201 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
202 		struct inet_peer *peer = rt_get_peer(rt);
203 		/*
204 		 * VJ's idea. We save last timestamp seen from
205 		 * the destination in peer table, when entering state
206 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
207 		 * when trying new connection.
208 		 */
209 		if (peer != NULL &&
210 		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
211 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 			tp->rx_opt.ts_recent = peer->tcp_ts;
213 		}
214 	}
215 
216 	inet->dport = usin->sin_port;
217 	inet->daddr = daddr;
218 
219 	inet_csk(sk)->icsk_ext_hdr_len = 0;
220 	if (inet->opt)
221 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
222 
223 	tp->rx_opt.mss_clamp = 536;
224 
225 	/* Socket identity is still unknown (sport may be zero).
226 	 * However we set state to SYN-SENT and not releasing socket
227 	 * lock select source port, enter ourselves into the hash tables and
228 	 * complete initialization after this.
229 	 */
230 	tcp_set_state(sk, TCP_SYN_SENT);
231 	err = inet_hash_connect(&tcp_death_row, sk);
232 	if (err)
233 		goto failure;
234 
235 	err = ip_route_newports(&rt, IPPROTO_TCP,
236 				inet->sport, inet->dport, sk);
237 	if (err)
238 		goto failure;
239 
240 	/* OK, now commit destination to socket.  */
241 	sk->sk_gso_type = SKB_GSO_TCPV4;
242 	sk_setup_caps(sk, &rt->u.dst);
243 
244 	if (!tp->write_seq)
245 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
246 							   inet->daddr,
247 							   inet->sport,
248 							   usin->sin_port);
249 
250 	inet->id = tp->write_seq ^ jiffies;
251 
252 	err = tcp_connect(sk);
253 	rt = NULL;
254 	if (err)
255 		goto failure;
256 
257 	return 0;
258 
259 failure:
260 	/*
261 	 * This unhashes the socket and releases the local port,
262 	 * if necessary.
263 	 */
264 	tcp_set_state(sk, TCP_CLOSE);
265 	ip_rt_put(rt);
266 	sk->sk_route_caps = 0;
267 	inet->dport = 0;
268 	return err;
269 }
270 
271 /*
272  * This routine does path mtu discovery as defined in RFC1191.
273  */
274 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
275 {
276 	struct dst_entry *dst;
277 	struct inet_sock *inet = inet_sk(sk);
278 
279 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
280 	 * send out by Linux are always <576bytes so they should go through
281 	 * unfragmented).
282 	 */
283 	if (sk->sk_state == TCP_LISTEN)
284 		return;
285 
286 	/* We don't check in the destentry if pmtu discovery is forbidden
287 	 * on this route. We just assume that no packet_to_big packets
288 	 * are send back when pmtu discovery is not active.
289 	 * There is a small race when the user changes this flag in the
290 	 * route, but I think that's acceptable.
291 	 */
292 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
293 		return;
294 
295 	dst->ops->update_pmtu(dst, mtu);
296 
297 	/* Something is about to be wrong... Remember soft error
298 	 * for the case, if this connection will not able to recover.
299 	 */
300 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
301 		sk->sk_err_soft = EMSGSIZE;
302 
303 	mtu = dst_mtu(dst);
304 
305 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
306 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
307 		tcp_sync_mss(sk, mtu);
308 
309 		/* Resend the TCP packet because it's
310 		 * clear that the old packet has been
311 		 * dropped. This is the new "fast" path mtu
312 		 * discovery.
313 		 */
314 		tcp_simple_retransmit(sk);
315 	} /* else let the usual retransmit timer handle it */
316 }
317 
318 /*
319  * This routine is called by the ICMP module when it gets some
320  * sort of error condition.  If err < 0 then the socket should
321  * be closed and the error returned to the user.  If err > 0
322  * it's just the icmp type << 8 | icmp code.  After adjustment
323  * header points to the first 8 bytes of the tcp header.  We need
324  * to find the appropriate port.
325  *
326  * The locking strategy used here is very "optimistic". When
327  * someone else accesses the socket the ICMP is just dropped
328  * and for some paths there is no check at all.
329  * A more general error queue to queue errors for later handling
330  * is probably better.
331  *
332  */
333 
334 void tcp_v4_err(struct sk_buff *skb, u32 info)
335 {
336 	struct iphdr *iph = (struct iphdr *)skb->data;
337 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
338 	struct tcp_sock *tp;
339 	struct inet_sock *inet;
340 	const int type = icmp_hdr(skb)->type;
341 	const int code = icmp_hdr(skb)->code;
342 	struct sock *sk;
343 	__u32 seq;
344 	int err;
345 	struct net *net = dev_net(skb->dev);
346 
347 	if (skb->len < (iph->ihl << 2) + 8) {
348 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
349 		return;
350 	}
351 
352 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
353 			iph->saddr, th->source, inet_iif(skb));
354 	if (!sk) {
355 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
356 		return;
357 	}
358 	if (sk->sk_state == TCP_TIME_WAIT) {
359 		inet_twsk_put(inet_twsk(sk));
360 		return;
361 	}
362 
363 	bh_lock_sock(sk);
364 	/* If too many ICMPs get dropped on busy
365 	 * servers this needs to be solved differently.
366 	 */
367 	if (sock_owned_by_user(sk))
368 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
369 
370 	if (sk->sk_state == TCP_CLOSE)
371 		goto out;
372 
373 	tp = tcp_sk(sk);
374 	seq = ntohl(th->seq);
375 	if (sk->sk_state != TCP_LISTEN &&
376 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
377 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
378 		goto out;
379 	}
380 
381 	switch (type) {
382 	case ICMP_SOURCE_QUENCH:
383 		/* Just silently ignore these. */
384 		goto out;
385 	case ICMP_PARAMETERPROB:
386 		err = EPROTO;
387 		break;
388 	case ICMP_DEST_UNREACH:
389 		if (code > NR_ICMP_UNREACH)
390 			goto out;
391 
392 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
393 			if (!sock_owned_by_user(sk))
394 				do_pmtu_discovery(sk, iph, info);
395 			goto out;
396 		}
397 
398 		err = icmp_err_convert[code].errno;
399 		break;
400 	case ICMP_TIME_EXCEEDED:
401 		err = EHOSTUNREACH;
402 		break;
403 	default:
404 		goto out;
405 	}
406 
407 	switch (sk->sk_state) {
408 		struct request_sock *req, **prev;
409 	case TCP_LISTEN:
410 		if (sock_owned_by_user(sk))
411 			goto out;
412 
413 		req = inet_csk_search_req(sk, &prev, th->dest,
414 					  iph->daddr, iph->saddr);
415 		if (!req)
416 			goto out;
417 
418 		/* ICMPs are not backlogged, hence we cannot get
419 		   an established socket here.
420 		 */
421 		BUG_TRAP(!req->sk);
422 
423 		if (seq != tcp_rsk(req)->snt_isn) {
424 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
425 			goto out;
426 		}
427 
428 		/*
429 		 * Still in SYN_RECV, just remove it silently.
430 		 * There is no good way to pass the error to the newly
431 		 * created socket, and POSIX does not want network
432 		 * errors returned from accept().
433 		 */
434 		inet_csk_reqsk_queue_drop(sk, req, prev);
435 		goto out;
436 
437 	case TCP_SYN_SENT:
438 	case TCP_SYN_RECV:  /* Cannot happen.
439 			       It can f.e. if SYNs crossed.
440 			     */
441 		if (!sock_owned_by_user(sk)) {
442 			sk->sk_err = err;
443 
444 			sk->sk_error_report(sk);
445 
446 			tcp_done(sk);
447 		} else {
448 			sk->sk_err_soft = err;
449 		}
450 		goto out;
451 	}
452 
453 	/* If we've already connected we will keep trying
454 	 * until we time out, or the user gives up.
455 	 *
456 	 * rfc1122 4.2.3.9 allows to consider as hard errors
457 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
458 	 * but it is obsoleted by pmtu discovery).
459 	 *
460 	 * Note, that in modern internet, where routing is unreliable
461 	 * and in each dark corner broken firewalls sit, sending random
462 	 * errors ordered by their masters even this two messages finally lose
463 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
464 	 *
465 	 * Now we are in compliance with RFCs.
466 	 *							--ANK (980905)
467 	 */
468 
469 	inet = inet_sk(sk);
470 	if (!sock_owned_by_user(sk) && inet->recverr) {
471 		sk->sk_err = err;
472 		sk->sk_error_report(sk);
473 	} else	{ /* Only an error on timeout */
474 		sk->sk_err_soft = err;
475 	}
476 
477 out:
478 	bh_unlock_sock(sk);
479 	sock_put(sk);
480 }
481 
482 /* This routine computes an IPv4 TCP checksum. */
483 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
484 {
485 	struct inet_sock *inet = inet_sk(sk);
486 	struct tcphdr *th = tcp_hdr(skb);
487 
488 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
489 		th->check = ~tcp_v4_check(len, inet->saddr,
490 					  inet->daddr, 0);
491 		skb->csum_start = skb_transport_header(skb) - skb->head;
492 		skb->csum_offset = offsetof(struct tcphdr, check);
493 	} else {
494 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
495 					 csum_partial((char *)th,
496 						      th->doff << 2,
497 						      skb->csum));
498 	}
499 }
500 
501 int tcp_v4_gso_send_check(struct sk_buff *skb)
502 {
503 	const struct iphdr *iph;
504 	struct tcphdr *th;
505 
506 	if (!pskb_may_pull(skb, sizeof(*th)))
507 		return -EINVAL;
508 
509 	iph = ip_hdr(skb);
510 	th = tcp_hdr(skb);
511 
512 	th->check = 0;
513 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
514 	skb->csum_start = skb_transport_header(skb) - skb->head;
515 	skb->csum_offset = offsetof(struct tcphdr, check);
516 	skb->ip_summed = CHECKSUM_PARTIAL;
517 	return 0;
518 }
519 
520 /*
521  *	This routine will send an RST to the other tcp.
522  *
523  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
524  *		      for reset.
525  *	Answer: if a packet caused RST, it is not for a socket
526  *		existing in our system, if it is matched to a socket,
527  *		it is just duplicate segment or bug in other side's TCP.
528  *		So that we build reply only basing on parameters
529  *		arrived with segment.
530  *	Exception: precedence violation. We do not implement it in any case.
531  */
532 
533 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
534 {
535 	struct tcphdr *th = tcp_hdr(skb);
536 	struct {
537 		struct tcphdr th;
538 #ifdef CONFIG_TCP_MD5SIG
539 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
540 #endif
541 	} rep;
542 	struct ip_reply_arg arg;
543 #ifdef CONFIG_TCP_MD5SIG
544 	struct tcp_md5sig_key *key;
545 #endif
546 	struct net *net;
547 
548 	/* Never send a reset in response to a reset. */
549 	if (th->rst)
550 		return;
551 
552 	if (skb->rtable->rt_type != RTN_LOCAL)
553 		return;
554 
555 	/* Swap the send and the receive. */
556 	memset(&rep, 0, sizeof(rep));
557 	rep.th.dest   = th->source;
558 	rep.th.source = th->dest;
559 	rep.th.doff   = sizeof(struct tcphdr) / 4;
560 	rep.th.rst    = 1;
561 
562 	if (th->ack) {
563 		rep.th.seq = th->ack_seq;
564 	} else {
565 		rep.th.ack = 1;
566 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
567 				       skb->len - (th->doff << 2));
568 	}
569 
570 	memset(&arg, 0, sizeof(arg));
571 	arg.iov[0].iov_base = (unsigned char *)&rep;
572 	arg.iov[0].iov_len  = sizeof(rep.th);
573 
574 #ifdef CONFIG_TCP_MD5SIG
575 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
576 	if (key) {
577 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
578 				   (TCPOPT_NOP << 16) |
579 				   (TCPOPT_MD5SIG << 8) |
580 				   TCPOLEN_MD5SIG);
581 		/* Update length and the length the header thinks exists */
582 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
583 		rep.th.doff = arg.iov[0].iov_len / 4;
584 
585 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
586 				     key, ip_hdr(skb)->daddr,
587 				     ip_hdr(skb)->saddr, &rep.th);
588 	}
589 #endif
590 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
591 				      ip_hdr(skb)->saddr, /* XXX */
592 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
593 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
594 
595 	net = dev_net(skb->dst->dev);
596 	ip_send_reply(net->ipv4.tcp_sock, skb,
597 		      &arg, arg.iov[0].iov_len);
598 
599 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
600 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
601 }
602 
603 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
604    outside socket context is ugly, certainly. What can I do?
605  */
606 
607 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
608 			    u32 win, u32 ts, int oif,
609 			    struct tcp_md5sig_key *key)
610 {
611 	struct tcphdr *th = tcp_hdr(skb);
612 	struct {
613 		struct tcphdr th;
614 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
615 #ifdef CONFIG_TCP_MD5SIG
616 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
617 #endif
618 			];
619 	} rep;
620 	struct ip_reply_arg arg;
621 	struct net *net = dev_net(skb->dev);
622 
623 	memset(&rep.th, 0, sizeof(struct tcphdr));
624 	memset(&arg, 0, sizeof(arg));
625 
626 	arg.iov[0].iov_base = (unsigned char *)&rep;
627 	arg.iov[0].iov_len  = sizeof(rep.th);
628 	if (ts) {
629 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
630 				   (TCPOPT_TIMESTAMP << 8) |
631 				   TCPOLEN_TIMESTAMP);
632 		rep.opt[1] = htonl(tcp_time_stamp);
633 		rep.opt[2] = htonl(ts);
634 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
635 	}
636 
637 	/* Swap the send and the receive. */
638 	rep.th.dest    = th->source;
639 	rep.th.source  = th->dest;
640 	rep.th.doff    = arg.iov[0].iov_len / 4;
641 	rep.th.seq     = htonl(seq);
642 	rep.th.ack_seq = htonl(ack);
643 	rep.th.ack     = 1;
644 	rep.th.window  = htons(win);
645 
646 #ifdef CONFIG_TCP_MD5SIG
647 	if (key) {
648 		int offset = (ts) ? 3 : 0;
649 
650 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
651 					  (TCPOPT_NOP << 16) |
652 					  (TCPOPT_MD5SIG << 8) |
653 					  TCPOLEN_MD5SIG);
654 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
655 		rep.th.doff = arg.iov[0].iov_len/4;
656 
657 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
658 				    key, ip_hdr(skb)->daddr,
659 				    ip_hdr(skb)->saddr, &rep.th);
660 	}
661 #endif
662 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
663 				      ip_hdr(skb)->saddr, /* XXX */
664 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
665 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
666 	if (oif)
667 		arg.bound_dev_if = oif;
668 
669 	ip_send_reply(net->ipv4.tcp_sock, skb,
670 		      &arg, arg.iov[0].iov_len);
671 
672 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
673 }
674 
675 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
676 {
677 	struct inet_timewait_sock *tw = inet_twsk(sk);
678 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
679 
680 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
681 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
682 			tcptw->tw_ts_recent,
683 			tw->tw_bound_dev_if,
684 			tcp_twsk_md5_key(tcptw)
685 			);
686 
687 	inet_twsk_put(tw);
688 }
689 
690 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
691 				  struct request_sock *req)
692 {
693 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
694 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
695 			req->ts_recent,
696 			0,
697 			tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr));
698 }
699 
700 /*
701  *	Send a SYN-ACK after having received a SYN.
702  *	This still operates on a request_sock only, not on a big
703  *	socket.
704  */
705 static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
706 				struct dst_entry *dst)
707 {
708 	const struct inet_request_sock *ireq = inet_rsk(req);
709 	int err = -1;
710 	struct sk_buff * skb;
711 
712 	/* First, grab a route. */
713 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
714 		return -1;
715 
716 	skb = tcp_make_synack(sk, dst, req);
717 
718 	if (skb) {
719 		struct tcphdr *th = tcp_hdr(skb);
720 
721 		th->check = tcp_v4_check(skb->len,
722 					 ireq->loc_addr,
723 					 ireq->rmt_addr,
724 					 csum_partial((char *)th, skb->len,
725 						      skb->csum));
726 
727 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
728 					    ireq->rmt_addr,
729 					    ireq->opt);
730 		err = net_xmit_eval(err);
731 	}
732 
733 	dst_release(dst);
734 	return err;
735 }
736 
737 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
738 {
739 	return __tcp_v4_send_synack(sk, req, NULL);
740 }
741 
742 /*
743  *	IPv4 request_sock destructor.
744  */
745 static void tcp_v4_reqsk_destructor(struct request_sock *req)
746 {
747 	kfree(inet_rsk(req)->opt);
748 }
749 
750 #ifdef CONFIG_SYN_COOKIES
751 static void syn_flood_warning(struct sk_buff *skb)
752 {
753 	static unsigned long warntime;
754 
755 	if (time_after(jiffies, (warntime + HZ * 60))) {
756 		warntime = jiffies;
757 		printk(KERN_INFO
758 		       "possible SYN flooding on port %d. Sending cookies.\n",
759 		       ntohs(tcp_hdr(skb)->dest));
760 	}
761 }
762 #endif
763 
764 /*
765  * Save and compile IPv4 options into the request_sock if needed.
766  */
767 static struct ip_options *tcp_v4_save_options(struct sock *sk,
768 					      struct sk_buff *skb)
769 {
770 	struct ip_options *opt = &(IPCB(skb)->opt);
771 	struct ip_options *dopt = NULL;
772 
773 	if (opt && opt->optlen) {
774 		int opt_size = optlength(opt);
775 		dopt = kmalloc(opt_size, GFP_ATOMIC);
776 		if (dopt) {
777 			if (ip_options_echo(dopt, skb)) {
778 				kfree(dopt);
779 				dopt = NULL;
780 			}
781 		}
782 	}
783 	return dopt;
784 }
785 
786 #ifdef CONFIG_TCP_MD5SIG
787 /*
788  * RFC2385 MD5 checksumming requires a mapping of
789  * IP address->MD5 Key.
790  * We need to maintain these in the sk structure.
791  */
792 
793 /* Find the Key structure for an address.  */
794 static struct tcp_md5sig_key *
795 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
796 {
797 	struct tcp_sock *tp = tcp_sk(sk);
798 	int i;
799 
800 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
801 		return NULL;
802 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
803 		if (tp->md5sig_info->keys4[i].addr == addr)
804 			return &tp->md5sig_info->keys4[i].base;
805 	}
806 	return NULL;
807 }
808 
809 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
810 					 struct sock *addr_sk)
811 {
812 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
813 }
814 
815 EXPORT_SYMBOL(tcp_v4_md5_lookup);
816 
817 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
818 						      struct request_sock *req)
819 {
820 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
821 }
822 
823 /* This can be called on a newly created socket, from other files */
824 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
825 		      u8 *newkey, u8 newkeylen)
826 {
827 	/* Add Key to the list */
828 	struct tcp_md5sig_key *key;
829 	struct tcp_sock *tp = tcp_sk(sk);
830 	struct tcp4_md5sig_key *keys;
831 
832 	key = tcp_v4_md5_do_lookup(sk, addr);
833 	if (key) {
834 		/* Pre-existing entry - just update that one. */
835 		kfree(key->key);
836 		key->key = newkey;
837 		key->keylen = newkeylen;
838 	} else {
839 		struct tcp_md5sig_info *md5sig;
840 
841 		if (!tp->md5sig_info) {
842 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
843 						  GFP_ATOMIC);
844 			if (!tp->md5sig_info) {
845 				kfree(newkey);
846 				return -ENOMEM;
847 			}
848 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
849 		}
850 		if (tcp_alloc_md5sig_pool() == NULL) {
851 			kfree(newkey);
852 			return -ENOMEM;
853 		}
854 		md5sig = tp->md5sig_info;
855 
856 		if (md5sig->alloced4 == md5sig->entries4) {
857 			keys = kmalloc((sizeof(*keys) *
858 					(md5sig->entries4 + 1)), GFP_ATOMIC);
859 			if (!keys) {
860 				kfree(newkey);
861 				tcp_free_md5sig_pool();
862 				return -ENOMEM;
863 			}
864 
865 			if (md5sig->entries4)
866 				memcpy(keys, md5sig->keys4,
867 				       sizeof(*keys) * md5sig->entries4);
868 
869 			/* Free old key list, and reference new one */
870 			kfree(md5sig->keys4);
871 			md5sig->keys4 = keys;
872 			md5sig->alloced4++;
873 		}
874 		md5sig->entries4++;
875 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
876 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
877 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
878 	}
879 	return 0;
880 }
881 
882 EXPORT_SYMBOL(tcp_v4_md5_do_add);
883 
884 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
885 			       u8 *newkey, u8 newkeylen)
886 {
887 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
888 				 newkey, newkeylen);
889 }
890 
891 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
892 {
893 	struct tcp_sock *tp = tcp_sk(sk);
894 	int i;
895 
896 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
897 		if (tp->md5sig_info->keys4[i].addr == addr) {
898 			/* Free the key */
899 			kfree(tp->md5sig_info->keys4[i].base.key);
900 			tp->md5sig_info->entries4--;
901 
902 			if (tp->md5sig_info->entries4 == 0) {
903 				kfree(tp->md5sig_info->keys4);
904 				tp->md5sig_info->keys4 = NULL;
905 				tp->md5sig_info->alloced4 = 0;
906 			} else if (tp->md5sig_info->entries4 != i) {
907 				/* Need to do some manipulation */
908 				memmove(&tp->md5sig_info->keys4[i],
909 					&tp->md5sig_info->keys4[i+1],
910 					(tp->md5sig_info->entries4 - i) *
911 					 sizeof(struct tcp4_md5sig_key));
912 			}
913 			tcp_free_md5sig_pool();
914 			return 0;
915 		}
916 	}
917 	return -ENOENT;
918 }
919 
920 EXPORT_SYMBOL(tcp_v4_md5_do_del);
921 
922 static void tcp_v4_clear_md5_list(struct sock *sk)
923 {
924 	struct tcp_sock *tp = tcp_sk(sk);
925 
926 	/* Free each key, then the set of key keys,
927 	 * the crypto element, and then decrement our
928 	 * hold on the last resort crypto.
929 	 */
930 	if (tp->md5sig_info->entries4) {
931 		int i;
932 		for (i = 0; i < tp->md5sig_info->entries4; i++)
933 			kfree(tp->md5sig_info->keys4[i].base.key);
934 		tp->md5sig_info->entries4 = 0;
935 		tcp_free_md5sig_pool();
936 	}
937 	if (tp->md5sig_info->keys4) {
938 		kfree(tp->md5sig_info->keys4);
939 		tp->md5sig_info->keys4 = NULL;
940 		tp->md5sig_info->alloced4  = 0;
941 	}
942 }
943 
944 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
945 				 int optlen)
946 {
947 	struct tcp_md5sig cmd;
948 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
949 	u8 *newkey;
950 
951 	if (optlen < sizeof(cmd))
952 		return -EINVAL;
953 
954 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
955 		return -EFAULT;
956 
957 	if (sin->sin_family != AF_INET)
958 		return -EINVAL;
959 
960 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
961 		if (!tcp_sk(sk)->md5sig_info)
962 			return -ENOENT;
963 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
964 	}
965 
966 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
967 		return -EINVAL;
968 
969 	if (!tcp_sk(sk)->md5sig_info) {
970 		struct tcp_sock *tp = tcp_sk(sk);
971 		struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
972 
973 		if (!p)
974 			return -EINVAL;
975 
976 		tp->md5sig_info = p;
977 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
978 	}
979 
980 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
981 	if (!newkey)
982 		return -ENOMEM;
983 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
984 				 newkey, cmd.tcpm_keylen);
985 }
986 
987 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
988 					__be32 daddr, __be32 saddr, int nbytes)
989 {
990 	struct tcp4_pseudohdr *bp;
991 	struct scatterlist sg;
992 
993 	bp = &hp->md5_blk.ip4;
994 
995 	/*
996 	 * 1. the TCP pseudo-header (in the order: source IP address,
997 	 * destination IP address, zero-padded protocol number, and
998 	 * segment length)
999 	 */
1000 	bp->saddr = saddr;
1001 	bp->daddr = daddr;
1002 	bp->pad = 0;
1003 	bp->protocol = IPPROTO_TCP;
1004 	bp->len = cpu_to_be16(nbytes);
1005 
1006 	sg_init_one(&sg, bp, sizeof(*bp));
1007 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1008 }
1009 
1010 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1011 			       __be32 daddr, __be32 saddr, struct tcphdr *th)
1012 {
1013 	struct tcp_md5sig_pool *hp;
1014 	struct hash_desc *desc;
1015 
1016 	hp = tcp_get_md5sig_pool();
1017 	if (!hp)
1018 		goto clear_hash_noput;
1019 	desc = &hp->md5_desc;
1020 
1021 	if (crypto_hash_init(desc))
1022 		goto clear_hash;
1023 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1024 		goto clear_hash;
1025 	if (tcp_md5_hash_header(hp, th))
1026 		goto clear_hash;
1027 	if (tcp_md5_hash_key(hp, key))
1028 		goto clear_hash;
1029 	if (crypto_hash_final(desc, md5_hash))
1030 		goto clear_hash;
1031 
1032 	tcp_put_md5sig_pool();
1033 	return 0;
1034 
1035 clear_hash:
1036 	tcp_put_md5sig_pool();
1037 clear_hash_noput:
1038 	memset(md5_hash, 0, 16);
1039 	return 1;
1040 }
1041 
1042 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1043 			struct sock *sk, struct request_sock *req,
1044 			struct sk_buff *skb)
1045 {
1046 	struct tcp_md5sig_pool *hp;
1047 	struct hash_desc *desc;
1048 	struct tcphdr *th = tcp_hdr(skb);
1049 	__be32 saddr, daddr;
1050 
1051 	if (sk) {
1052 		saddr = inet_sk(sk)->saddr;
1053 		daddr = inet_sk(sk)->daddr;
1054 	} else if (req) {
1055 		saddr = inet_rsk(req)->loc_addr;
1056 		daddr = inet_rsk(req)->rmt_addr;
1057 	} else {
1058 		const struct iphdr *iph = ip_hdr(skb);
1059 		saddr = iph->saddr;
1060 		daddr = iph->daddr;
1061 	}
1062 
1063 	hp = tcp_get_md5sig_pool();
1064 	if (!hp)
1065 		goto clear_hash_noput;
1066 	desc = &hp->md5_desc;
1067 
1068 	if (crypto_hash_init(desc))
1069 		goto clear_hash;
1070 
1071 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1072 		goto clear_hash;
1073 	if (tcp_md5_hash_header(hp, th))
1074 		goto clear_hash;
1075 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1076 		goto clear_hash;
1077 	if (tcp_md5_hash_key(hp, key))
1078 		goto clear_hash;
1079 	if (crypto_hash_final(desc, md5_hash))
1080 		goto clear_hash;
1081 
1082 	tcp_put_md5sig_pool();
1083 	return 0;
1084 
1085 clear_hash:
1086 	tcp_put_md5sig_pool();
1087 clear_hash_noput:
1088 	memset(md5_hash, 0, 16);
1089 	return 1;
1090 }
1091 
1092 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1093 
1094 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1095 {
1096 	/*
1097 	 * This gets called for each TCP segment that arrives
1098 	 * so we want to be efficient.
1099 	 * We have 3 drop cases:
1100 	 * o No MD5 hash and one expected.
1101 	 * o MD5 hash and we're not expecting one.
1102 	 * o MD5 hash and its wrong.
1103 	 */
1104 	__u8 *hash_location = NULL;
1105 	struct tcp_md5sig_key *hash_expected;
1106 	const struct iphdr *iph = ip_hdr(skb);
1107 	struct tcphdr *th = tcp_hdr(skb);
1108 	int genhash;
1109 	unsigned char newhash[16];
1110 
1111 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1112 	hash_location = tcp_parse_md5sig_option(th);
1113 
1114 	/* We've parsed the options - do we have a hash? */
1115 	if (!hash_expected && !hash_location)
1116 		return 0;
1117 
1118 	if (hash_expected && !hash_location) {
1119 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
1120 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1121 			       NIPQUAD(iph->saddr), ntohs(th->source),
1122 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1123 		return 1;
1124 	}
1125 
1126 	if (!hash_expected && hash_location) {
1127 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1128 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1129 			       NIPQUAD(iph->saddr), ntohs(th->source),
1130 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1131 		return 1;
1132 	}
1133 
1134 	/* Okay, so this is hash_expected and hash_location -
1135 	 * so we need to calculate the checksum.
1136 	 */
1137 	genhash = tcp_v4_md5_hash_skb(newhash,
1138 				      hash_expected,
1139 				      NULL, NULL, skb);
1140 
1141 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1142 		if (net_ratelimit()) {
1143 			printk(KERN_INFO "MD5 Hash failed for "
1144 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1145 			       NIPQUAD(iph->saddr), ntohs(th->source),
1146 			       NIPQUAD(iph->daddr), ntohs(th->dest),
1147 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1148 		}
1149 		return 1;
1150 	}
1151 	return 0;
1152 }
1153 
1154 #endif
1155 
1156 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1157 	.family		=	PF_INET,
1158 	.obj_size	=	sizeof(struct tcp_request_sock),
1159 	.rtx_syn_ack	=	tcp_v4_send_synack,
1160 	.send_ack	=	tcp_v4_reqsk_send_ack,
1161 	.destructor	=	tcp_v4_reqsk_destructor,
1162 	.send_reset	=	tcp_v4_send_reset,
1163 };
1164 
1165 #ifdef CONFIG_TCP_MD5SIG
1166 static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1167 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1168 };
1169 #endif
1170 
1171 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1172 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1173 	.twsk_unique	= tcp_twsk_unique,
1174 	.twsk_destructor= tcp_twsk_destructor,
1175 };
1176 
1177 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1178 {
1179 	struct inet_request_sock *ireq;
1180 	struct tcp_options_received tmp_opt;
1181 	struct request_sock *req;
1182 	__be32 saddr = ip_hdr(skb)->saddr;
1183 	__be32 daddr = ip_hdr(skb)->daddr;
1184 	__u32 isn = TCP_SKB_CB(skb)->when;
1185 	struct dst_entry *dst = NULL;
1186 #ifdef CONFIG_SYN_COOKIES
1187 	int want_cookie = 0;
1188 #else
1189 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1190 #endif
1191 
1192 	/* Never answer to SYNs send to broadcast or multicast */
1193 	if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1194 		goto drop;
1195 
1196 	/* TW buckets are converted to open requests without
1197 	 * limitations, they conserve resources and peer is
1198 	 * evidently real one.
1199 	 */
1200 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1201 #ifdef CONFIG_SYN_COOKIES
1202 		if (sysctl_tcp_syncookies) {
1203 			want_cookie = 1;
1204 		} else
1205 #endif
1206 		goto drop;
1207 	}
1208 
1209 	/* Accept backlog is full. If we have already queued enough
1210 	 * of warm entries in syn queue, drop request. It is better than
1211 	 * clogging syn queue with openreqs with exponentially increasing
1212 	 * timeout.
1213 	 */
1214 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1215 		goto drop;
1216 
1217 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1218 	if (!req)
1219 		goto drop;
1220 
1221 #ifdef CONFIG_TCP_MD5SIG
1222 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1223 #endif
1224 
1225 	tcp_clear_options(&tmp_opt);
1226 	tmp_opt.mss_clamp = 536;
1227 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1228 
1229 	tcp_parse_options(skb, &tmp_opt, 0);
1230 
1231 	if (want_cookie && !tmp_opt.saw_tstamp)
1232 		tcp_clear_options(&tmp_opt);
1233 
1234 	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1235 		/* Some OSes (unknown ones, but I see them on web server, which
1236 		 * contains information interesting only for windows'
1237 		 * users) do not send their stamp in SYN. It is easy case.
1238 		 * We simply do not advertise TS support.
1239 		 */
1240 		tmp_opt.saw_tstamp = 0;
1241 		tmp_opt.tstamp_ok  = 0;
1242 	}
1243 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1244 
1245 	tcp_openreq_init(req, &tmp_opt, skb);
1246 
1247 	if (security_inet_conn_request(sk, skb, req))
1248 		goto drop_and_free;
1249 
1250 	ireq = inet_rsk(req);
1251 	ireq->loc_addr = daddr;
1252 	ireq->rmt_addr = saddr;
1253 	ireq->opt = tcp_v4_save_options(sk, skb);
1254 	if (!want_cookie)
1255 		TCP_ECN_create_request(req, tcp_hdr(skb));
1256 
1257 	if (want_cookie) {
1258 #ifdef CONFIG_SYN_COOKIES
1259 		syn_flood_warning(skb);
1260 		req->cookie_ts = tmp_opt.tstamp_ok;
1261 #endif
1262 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1263 	} else if (!isn) {
1264 		struct inet_peer *peer = NULL;
1265 
1266 		/* VJ's idea. We save last timestamp seen
1267 		 * from the destination in peer table, when entering
1268 		 * state TIME-WAIT, and check against it before
1269 		 * accepting new connection request.
1270 		 *
1271 		 * If "isn" is not zero, this request hit alive
1272 		 * timewait bucket, so that all the necessary checks
1273 		 * are made in the function processing timewait state.
1274 		 */
1275 		if (tmp_opt.saw_tstamp &&
1276 		    tcp_death_row.sysctl_tw_recycle &&
1277 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1278 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1279 		    peer->v4daddr == saddr) {
1280 			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1281 			    (s32)(peer->tcp_ts - req->ts_recent) >
1282 							TCP_PAWS_WINDOW) {
1283 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1284 				goto drop_and_release;
1285 			}
1286 		}
1287 		/* Kill the following clause, if you dislike this way. */
1288 		else if (!sysctl_tcp_syncookies &&
1289 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1290 			  (sysctl_max_syn_backlog >> 2)) &&
1291 			 (!peer || !peer->tcp_ts_stamp) &&
1292 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1293 			/* Without syncookies last quarter of
1294 			 * backlog is filled with destinations,
1295 			 * proven to be alive.
1296 			 * It means that we continue to communicate
1297 			 * to destinations, already remembered
1298 			 * to the moment of synflood.
1299 			 */
1300 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1301 				       "request from " NIPQUAD_FMT "/%u\n",
1302 				       NIPQUAD(saddr),
1303 				       ntohs(tcp_hdr(skb)->source));
1304 			goto drop_and_release;
1305 		}
1306 
1307 		isn = tcp_v4_init_sequence(skb);
1308 	}
1309 	tcp_rsk(req)->snt_isn = isn;
1310 
1311 	if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1312 		goto drop_and_free;
1313 
1314 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1315 	return 0;
1316 
1317 drop_and_release:
1318 	dst_release(dst);
1319 drop_and_free:
1320 	reqsk_free(req);
1321 drop:
1322 	return 0;
1323 }
1324 
1325 
1326 /*
1327  * The three way handshake has completed - we got a valid synack -
1328  * now create the new socket.
1329  */
1330 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1331 				  struct request_sock *req,
1332 				  struct dst_entry *dst)
1333 {
1334 	struct inet_request_sock *ireq;
1335 	struct inet_sock *newinet;
1336 	struct tcp_sock *newtp;
1337 	struct sock *newsk;
1338 #ifdef CONFIG_TCP_MD5SIG
1339 	struct tcp_md5sig_key *key;
1340 #endif
1341 
1342 	if (sk_acceptq_is_full(sk))
1343 		goto exit_overflow;
1344 
1345 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1346 		goto exit;
1347 
1348 	newsk = tcp_create_openreq_child(sk, req, skb);
1349 	if (!newsk)
1350 		goto exit;
1351 
1352 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1353 	sk_setup_caps(newsk, dst);
1354 
1355 	newtp		      = tcp_sk(newsk);
1356 	newinet		      = inet_sk(newsk);
1357 	ireq		      = inet_rsk(req);
1358 	newinet->daddr	      = ireq->rmt_addr;
1359 	newinet->rcv_saddr    = ireq->loc_addr;
1360 	newinet->saddr	      = ireq->loc_addr;
1361 	newinet->opt	      = ireq->opt;
1362 	ireq->opt	      = NULL;
1363 	newinet->mc_index     = inet_iif(skb);
1364 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1365 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1366 	if (newinet->opt)
1367 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1368 	newinet->id = newtp->write_seq ^ jiffies;
1369 
1370 	tcp_mtup_init(newsk);
1371 	tcp_sync_mss(newsk, dst_mtu(dst));
1372 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1373 	tcp_initialize_rcv_mss(newsk);
1374 
1375 #ifdef CONFIG_TCP_MD5SIG
1376 	/* Copy over the MD5 key from the original socket */
1377 	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1378 		/*
1379 		 * We're using one, so create a matching key
1380 		 * on the newsk structure. If we fail to get
1381 		 * memory, then we end up not copying the key
1382 		 * across. Shucks.
1383 		 */
1384 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1385 		if (newkey != NULL)
1386 			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1387 					  newkey, key->keylen);
1388 		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1389 	}
1390 #endif
1391 
1392 	__inet_hash_nolisten(newsk);
1393 	__inet_inherit_port(sk, newsk);
1394 
1395 	return newsk;
1396 
1397 exit_overflow:
1398 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1399 exit:
1400 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1401 	dst_release(dst);
1402 	return NULL;
1403 }
1404 
1405 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1406 {
1407 	struct tcphdr *th = tcp_hdr(skb);
1408 	const struct iphdr *iph = ip_hdr(skb);
1409 	struct sock *nsk;
1410 	struct request_sock **prev;
1411 	/* Find possible connection requests. */
1412 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1413 						       iph->saddr, iph->daddr);
1414 	if (req)
1415 		return tcp_check_req(sk, skb, req, prev);
1416 
1417 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1418 			th->source, iph->daddr, th->dest, inet_iif(skb));
1419 
1420 	if (nsk) {
1421 		if (nsk->sk_state != TCP_TIME_WAIT) {
1422 			bh_lock_sock(nsk);
1423 			return nsk;
1424 		}
1425 		inet_twsk_put(inet_twsk(nsk));
1426 		return NULL;
1427 	}
1428 
1429 #ifdef CONFIG_SYN_COOKIES
1430 	if (!th->rst && !th->syn && th->ack)
1431 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1432 #endif
1433 	return sk;
1434 }
1435 
1436 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1437 {
1438 	const struct iphdr *iph = ip_hdr(skb);
1439 
1440 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1441 		if (!tcp_v4_check(skb->len, iph->saddr,
1442 				  iph->daddr, skb->csum)) {
1443 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1444 			return 0;
1445 		}
1446 	}
1447 
1448 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1449 				       skb->len, IPPROTO_TCP, 0);
1450 
1451 	if (skb->len <= 76) {
1452 		return __skb_checksum_complete(skb);
1453 	}
1454 	return 0;
1455 }
1456 
1457 
1458 /* The socket must have it's spinlock held when we get
1459  * here.
1460  *
1461  * We have a potential double-lock case here, so even when
1462  * doing backlog processing we use the BH locking scheme.
1463  * This is because we cannot sleep with the original spinlock
1464  * held.
1465  */
1466 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1467 {
1468 	struct sock *rsk;
1469 #ifdef CONFIG_TCP_MD5SIG
1470 	/*
1471 	 * We really want to reject the packet as early as possible
1472 	 * if:
1473 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1474 	 *  o There is an MD5 option and we're not expecting one
1475 	 */
1476 	if (tcp_v4_inbound_md5_hash(sk, skb))
1477 		goto discard;
1478 #endif
1479 
1480 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1481 		TCP_CHECK_TIMER(sk);
1482 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1483 			rsk = sk;
1484 			goto reset;
1485 		}
1486 		TCP_CHECK_TIMER(sk);
1487 		return 0;
1488 	}
1489 
1490 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1491 		goto csum_err;
1492 
1493 	if (sk->sk_state == TCP_LISTEN) {
1494 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1495 		if (!nsk)
1496 			goto discard;
1497 
1498 		if (nsk != sk) {
1499 			if (tcp_child_process(sk, nsk, skb)) {
1500 				rsk = nsk;
1501 				goto reset;
1502 			}
1503 			return 0;
1504 		}
1505 	}
1506 
1507 	TCP_CHECK_TIMER(sk);
1508 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1509 		rsk = sk;
1510 		goto reset;
1511 	}
1512 	TCP_CHECK_TIMER(sk);
1513 	return 0;
1514 
1515 reset:
1516 	tcp_v4_send_reset(rsk, skb);
1517 discard:
1518 	kfree_skb(skb);
1519 	/* Be careful here. If this function gets more complicated and
1520 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1521 	 * might be destroyed here. This current version compiles correctly,
1522 	 * but you have been warned.
1523 	 */
1524 	return 0;
1525 
1526 csum_err:
1527 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1528 	goto discard;
1529 }
1530 
1531 /*
1532  *	From tcp_input.c
1533  */
1534 
1535 int tcp_v4_rcv(struct sk_buff *skb)
1536 {
1537 	const struct iphdr *iph;
1538 	struct tcphdr *th;
1539 	struct sock *sk;
1540 	int ret;
1541 	struct net *net = dev_net(skb->dev);
1542 
1543 	if (skb->pkt_type != PACKET_HOST)
1544 		goto discard_it;
1545 
1546 	/* Count it even if it's bad */
1547 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1548 
1549 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1550 		goto discard_it;
1551 
1552 	th = tcp_hdr(skb);
1553 
1554 	if (th->doff < sizeof(struct tcphdr) / 4)
1555 		goto bad_packet;
1556 	if (!pskb_may_pull(skb, th->doff * 4))
1557 		goto discard_it;
1558 
1559 	/* An explanation is required here, I think.
1560 	 * Packet length and doff are validated by header prediction,
1561 	 * provided case of th->doff==0 is eliminated.
1562 	 * So, we defer the checks. */
1563 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1564 		goto bad_packet;
1565 
1566 	th = tcp_hdr(skb);
1567 	iph = ip_hdr(skb);
1568 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1569 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1570 				    skb->len - th->doff * 4);
1571 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1572 	TCP_SKB_CB(skb)->when	 = 0;
1573 	TCP_SKB_CB(skb)->flags	 = iph->tos;
1574 	TCP_SKB_CB(skb)->sacked	 = 0;
1575 
1576 	sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
1577 			th->source, iph->daddr, th->dest, inet_iif(skb));
1578 	if (!sk)
1579 		goto no_tcp_socket;
1580 
1581 process:
1582 	if (sk->sk_state == TCP_TIME_WAIT)
1583 		goto do_time_wait;
1584 
1585 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1586 		goto discard_and_relse;
1587 	nf_reset(skb);
1588 
1589 	if (sk_filter(sk, skb))
1590 		goto discard_and_relse;
1591 
1592 	skb->dev = NULL;
1593 
1594 	bh_lock_sock_nested(sk);
1595 	ret = 0;
1596 	if (!sock_owned_by_user(sk)) {
1597 #ifdef CONFIG_NET_DMA
1598 		struct tcp_sock *tp = tcp_sk(sk);
1599 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1600 			tp->ucopy.dma_chan = get_softnet_dma();
1601 		if (tp->ucopy.dma_chan)
1602 			ret = tcp_v4_do_rcv(sk, skb);
1603 		else
1604 #endif
1605 		{
1606 			if (!tcp_prequeue(sk, skb))
1607 			ret = tcp_v4_do_rcv(sk, skb);
1608 		}
1609 	} else
1610 		sk_add_backlog(sk, skb);
1611 	bh_unlock_sock(sk);
1612 
1613 	sock_put(sk);
1614 
1615 	return ret;
1616 
1617 no_tcp_socket:
1618 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1619 		goto discard_it;
1620 
1621 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1622 bad_packet:
1623 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1624 	} else {
1625 		tcp_v4_send_reset(NULL, skb);
1626 	}
1627 
1628 discard_it:
1629 	/* Discard frame. */
1630 	kfree_skb(skb);
1631 	return 0;
1632 
1633 discard_and_relse:
1634 	sock_put(sk);
1635 	goto discard_it;
1636 
1637 do_time_wait:
1638 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1639 		inet_twsk_put(inet_twsk(sk));
1640 		goto discard_it;
1641 	}
1642 
1643 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1644 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1645 		inet_twsk_put(inet_twsk(sk));
1646 		goto discard_it;
1647 	}
1648 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1649 	case TCP_TW_SYN: {
1650 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1651 							&tcp_hashinfo,
1652 							iph->daddr, th->dest,
1653 							inet_iif(skb));
1654 		if (sk2) {
1655 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1656 			inet_twsk_put(inet_twsk(sk));
1657 			sk = sk2;
1658 			goto process;
1659 		}
1660 		/* Fall through to ACK */
1661 	}
1662 	case TCP_TW_ACK:
1663 		tcp_v4_timewait_ack(sk, skb);
1664 		break;
1665 	case TCP_TW_RST:
1666 		goto no_tcp_socket;
1667 	case TCP_TW_SUCCESS:;
1668 	}
1669 	goto discard_it;
1670 }
1671 
1672 /* VJ's idea. Save last timestamp seen from this destination
1673  * and hold it at least for normal timewait interval to use for duplicate
1674  * segment detection in subsequent connections, before they enter synchronized
1675  * state.
1676  */
1677 
1678 int tcp_v4_remember_stamp(struct sock *sk)
1679 {
1680 	struct inet_sock *inet = inet_sk(sk);
1681 	struct tcp_sock *tp = tcp_sk(sk);
1682 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1683 	struct inet_peer *peer = NULL;
1684 	int release_it = 0;
1685 
1686 	if (!rt || rt->rt_dst != inet->daddr) {
1687 		peer = inet_getpeer(inet->daddr, 1);
1688 		release_it = 1;
1689 	} else {
1690 		if (!rt->peer)
1691 			rt_bind_peer(rt, 1);
1692 		peer = rt->peer;
1693 	}
1694 
1695 	if (peer) {
1696 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1697 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1698 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1699 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1700 			peer->tcp_ts = tp->rx_opt.ts_recent;
1701 		}
1702 		if (release_it)
1703 			inet_putpeer(peer);
1704 		return 1;
1705 	}
1706 
1707 	return 0;
1708 }
1709 
1710 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1711 {
1712 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1713 
1714 	if (peer) {
1715 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1716 
1717 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1718 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1719 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1720 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1721 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1722 		}
1723 		inet_putpeer(peer);
1724 		return 1;
1725 	}
1726 
1727 	return 0;
1728 }
1729 
1730 struct inet_connection_sock_af_ops ipv4_specific = {
1731 	.queue_xmit	   = ip_queue_xmit,
1732 	.send_check	   = tcp_v4_send_check,
1733 	.rebuild_header	   = inet_sk_rebuild_header,
1734 	.conn_request	   = tcp_v4_conn_request,
1735 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1736 	.remember_stamp	   = tcp_v4_remember_stamp,
1737 	.net_header_len	   = sizeof(struct iphdr),
1738 	.setsockopt	   = ip_setsockopt,
1739 	.getsockopt	   = ip_getsockopt,
1740 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1741 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1742 	.bind_conflict	   = inet_csk_bind_conflict,
1743 #ifdef CONFIG_COMPAT
1744 	.compat_setsockopt = compat_ip_setsockopt,
1745 	.compat_getsockopt = compat_ip_getsockopt,
1746 #endif
1747 };
1748 
1749 #ifdef CONFIG_TCP_MD5SIG
1750 static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1751 	.md5_lookup		= tcp_v4_md5_lookup,
1752 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1753 	.md5_add		= tcp_v4_md5_add_func,
1754 	.md5_parse		= tcp_v4_parse_md5_keys,
1755 };
1756 #endif
1757 
1758 /* NOTE: A lot of things set to zero explicitly by call to
1759  *       sk_alloc() so need not be done here.
1760  */
1761 static int tcp_v4_init_sock(struct sock *sk)
1762 {
1763 	struct inet_connection_sock *icsk = inet_csk(sk);
1764 	struct tcp_sock *tp = tcp_sk(sk);
1765 
1766 	skb_queue_head_init(&tp->out_of_order_queue);
1767 	tcp_init_xmit_timers(sk);
1768 	tcp_prequeue_init(tp);
1769 
1770 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1771 	tp->mdev = TCP_TIMEOUT_INIT;
1772 
1773 	/* So many TCP implementations out there (incorrectly) count the
1774 	 * initial SYN frame in their delayed-ACK and congestion control
1775 	 * algorithms that we must have the following bandaid to talk
1776 	 * efficiently to them.  -DaveM
1777 	 */
1778 	tp->snd_cwnd = 2;
1779 
1780 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1781 	 * initialization of these values.
1782 	 */
1783 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
1784 	tp->snd_cwnd_clamp = ~0;
1785 	tp->mss_cache = 536;
1786 
1787 	tp->reordering = sysctl_tcp_reordering;
1788 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1789 
1790 	sk->sk_state = TCP_CLOSE;
1791 
1792 	sk->sk_write_space = sk_stream_write_space;
1793 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1794 
1795 	icsk->icsk_af_ops = &ipv4_specific;
1796 	icsk->icsk_sync_mss = tcp_sync_mss;
1797 #ifdef CONFIG_TCP_MD5SIG
1798 	tp->af_specific = &tcp_sock_ipv4_specific;
1799 #endif
1800 
1801 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1802 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1803 
1804 	atomic_inc(&tcp_sockets_allocated);
1805 
1806 	return 0;
1807 }
1808 
1809 void tcp_v4_destroy_sock(struct sock *sk)
1810 {
1811 	struct tcp_sock *tp = tcp_sk(sk);
1812 
1813 	tcp_clear_xmit_timers(sk);
1814 
1815 	tcp_cleanup_congestion_control(sk);
1816 
1817 	/* Cleanup up the write buffer. */
1818 	tcp_write_queue_purge(sk);
1819 
1820 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1821 	__skb_queue_purge(&tp->out_of_order_queue);
1822 
1823 #ifdef CONFIG_TCP_MD5SIG
1824 	/* Clean up the MD5 key list, if any */
1825 	if (tp->md5sig_info) {
1826 		tcp_v4_clear_md5_list(sk);
1827 		kfree(tp->md5sig_info);
1828 		tp->md5sig_info = NULL;
1829 	}
1830 #endif
1831 
1832 #ifdef CONFIG_NET_DMA
1833 	/* Cleans up our sk_async_wait_queue */
1834 	__skb_queue_purge(&sk->sk_async_wait_queue);
1835 #endif
1836 
1837 	/* Clean prequeue, it must be empty really */
1838 	__skb_queue_purge(&tp->ucopy.prequeue);
1839 
1840 	/* Clean up a referenced TCP bind bucket. */
1841 	if (inet_csk(sk)->icsk_bind_hash)
1842 		inet_put_port(sk);
1843 
1844 	/*
1845 	 * If sendmsg cached page exists, toss it.
1846 	 */
1847 	if (sk->sk_sndmsg_page) {
1848 		__free_page(sk->sk_sndmsg_page);
1849 		sk->sk_sndmsg_page = NULL;
1850 	}
1851 
1852 	atomic_dec(&tcp_sockets_allocated);
1853 }
1854 
1855 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1856 
1857 #ifdef CONFIG_PROC_FS
1858 /* Proc filesystem TCP sock list dumping. */
1859 
1860 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1861 {
1862 	return hlist_empty(head) ? NULL :
1863 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1864 }
1865 
1866 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1867 {
1868 	return tw->tw_node.next ?
1869 		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1870 }
1871 
1872 static void *listening_get_next(struct seq_file *seq, void *cur)
1873 {
1874 	struct inet_connection_sock *icsk;
1875 	struct hlist_node *node;
1876 	struct sock *sk = cur;
1877 	struct tcp_iter_state* st = seq->private;
1878 	struct net *net = seq_file_net(seq);
1879 
1880 	if (!sk) {
1881 		st->bucket = 0;
1882 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1883 		goto get_sk;
1884 	}
1885 
1886 	++st->num;
1887 
1888 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1889 		struct request_sock *req = cur;
1890 
1891 		icsk = inet_csk(st->syn_wait_sk);
1892 		req = req->dl_next;
1893 		while (1) {
1894 			while (req) {
1895 				if (req->rsk_ops->family == st->family) {
1896 					cur = req;
1897 					goto out;
1898 				}
1899 				req = req->dl_next;
1900 			}
1901 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1902 				break;
1903 get_req:
1904 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1905 		}
1906 		sk	  = sk_next(st->syn_wait_sk);
1907 		st->state = TCP_SEQ_STATE_LISTENING;
1908 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1909 	} else {
1910 		icsk = inet_csk(sk);
1911 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1912 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1913 			goto start_req;
1914 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1915 		sk = sk_next(sk);
1916 	}
1917 get_sk:
1918 	sk_for_each_from(sk, node) {
1919 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1920 			cur = sk;
1921 			goto out;
1922 		}
1923 		icsk = inet_csk(sk);
1924 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1925 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1926 start_req:
1927 			st->uid		= sock_i_uid(sk);
1928 			st->syn_wait_sk = sk;
1929 			st->state	= TCP_SEQ_STATE_OPENREQ;
1930 			st->sbucket	= 0;
1931 			goto get_req;
1932 		}
1933 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1934 	}
1935 	if (++st->bucket < INET_LHTABLE_SIZE) {
1936 		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
1937 		goto get_sk;
1938 	}
1939 	cur = NULL;
1940 out:
1941 	return cur;
1942 }
1943 
1944 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1945 {
1946 	void *rc = listening_get_next(seq, NULL);
1947 
1948 	while (rc && *pos) {
1949 		rc = listening_get_next(seq, rc);
1950 		--*pos;
1951 	}
1952 	return rc;
1953 }
1954 
1955 static void *established_get_first(struct seq_file *seq)
1956 {
1957 	struct tcp_iter_state* st = seq->private;
1958 	struct net *net = seq_file_net(seq);
1959 	void *rc = NULL;
1960 
1961 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1962 		struct sock *sk;
1963 		struct hlist_node *node;
1964 		struct inet_timewait_sock *tw;
1965 		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1966 
1967 		read_lock_bh(lock);
1968 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1969 			if (sk->sk_family != st->family ||
1970 			    !net_eq(sock_net(sk), net)) {
1971 				continue;
1972 			}
1973 			rc = sk;
1974 			goto out;
1975 		}
1976 		st->state = TCP_SEQ_STATE_TIME_WAIT;
1977 		inet_twsk_for_each(tw, node,
1978 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
1979 			if (tw->tw_family != st->family ||
1980 			    !net_eq(twsk_net(tw), net)) {
1981 				continue;
1982 			}
1983 			rc = tw;
1984 			goto out;
1985 		}
1986 		read_unlock_bh(lock);
1987 		st->state = TCP_SEQ_STATE_ESTABLISHED;
1988 	}
1989 out:
1990 	return rc;
1991 }
1992 
1993 static void *established_get_next(struct seq_file *seq, void *cur)
1994 {
1995 	struct sock *sk = cur;
1996 	struct inet_timewait_sock *tw;
1997 	struct hlist_node *node;
1998 	struct tcp_iter_state* st = seq->private;
1999 	struct net *net = seq_file_net(seq);
2000 
2001 	++st->num;
2002 
2003 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2004 		tw = cur;
2005 		tw = tw_next(tw);
2006 get_tw:
2007 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2008 			tw = tw_next(tw);
2009 		}
2010 		if (tw) {
2011 			cur = tw;
2012 			goto out;
2013 		}
2014 		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2015 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2016 
2017 		if (++st->bucket < tcp_hashinfo.ehash_size) {
2018 			read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2019 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2020 		} else {
2021 			cur = NULL;
2022 			goto out;
2023 		}
2024 	} else
2025 		sk = sk_next(sk);
2026 
2027 	sk_for_each_from(sk, node) {
2028 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2029 			goto found;
2030 	}
2031 
2032 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2033 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2034 	goto get_tw;
2035 found:
2036 	cur = sk;
2037 out:
2038 	return cur;
2039 }
2040 
2041 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2042 {
2043 	void *rc = established_get_first(seq);
2044 
2045 	while (rc && pos) {
2046 		rc = established_get_next(seq, rc);
2047 		--pos;
2048 	}
2049 	return rc;
2050 }
2051 
2052 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2053 {
2054 	void *rc;
2055 	struct tcp_iter_state* st = seq->private;
2056 
2057 	inet_listen_lock(&tcp_hashinfo);
2058 	st->state = TCP_SEQ_STATE_LISTENING;
2059 	rc	  = listening_get_idx(seq, &pos);
2060 
2061 	if (!rc) {
2062 		inet_listen_unlock(&tcp_hashinfo);
2063 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2064 		rc	  = established_get_idx(seq, pos);
2065 	}
2066 
2067 	return rc;
2068 }
2069 
2070 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2071 {
2072 	struct tcp_iter_state* st = seq->private;
2073 	st->state = TCP_SEQ_STATE_LISTENING;
2074 	st->num = 0;
2075 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2076 }
2077 
2078 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2079 {
2080 	void *rc = NULL;
2081 	struct tcp_iter_state* st;
2082 
2083 	if (v == SEQ_START_TOKEN) {
2084 		rc = tcp_get_idx(seq, 0);
2085 		goto out;
2086 	}
2087 	st = seq->private;
2088 
2089 	switch (st->state) {
2090 	case TCP_SEQ_STATE_OPENREQ:
2091 	case TCP_SEQ_STATE_LISTENING:
2092 		rc = listening_get_next(seq, v);
2093 		if (!rc) {
2094 			inet_listen_unlock(&tcp_hashinfo);
2095 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2096 			rc	  = established_get_first(seq);
2097 		}
2098 		break;
2099 	case TCP_SEQ_STATE_ESTABLISHED:
2100 	case TCP_SEQ_STATE_TIME_WAIT:
2101 		rc = established_get_next(seq, v);
2102 		break;
2103 	}
2104 out:
2105 	++*pos;
2106 	return rc;
2107 }
2108 
2109 static void tcp_seq_stop(struct seq_file *seq, void *v)
2110 {
2111 	struct tcp_iter_state* st = seq->private;
2112 
2113 	switch (st->state) {
2114 	case TCP_SEQ_STATE_OPENREQ:
2115 		if (v) {
2116 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2117 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2118 		}
2119 	case TCP_SEQ_STATE_LISTENING:
2120 		if (v != SEQ_START_TOKEN)
2121 			inet_listen_unlock(&tcp_hashinfo);
2122 		break;
2123 	case TCP_SEQ_STATE_TIME_WAIT:
2124 	case TCP_SEQ_STATE_ESTABLISHED:
2125 		if (v)
2126 			read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2127 		break;
2128 	}
2129 }
2130 
2131 static int tcp_seq_open(struct inode *inode, struct file *file)
2132 {
2133 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2134 	struct tcp_iter_state *s;
2135 	int err;
2136 
2137 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2138 			  sizeof(struct tcp_iter_state));
2139 	if (err < 0)
2140 		return err;
2141 
2142 	s = ((struct seq_file *)file->private_data)->private;
2143 	s->family		= afinfo->family;
2144 	return 0;
2145 }
2146 
2147 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2148 {
2149 	int rc = 0;
2150 	struct proc_dir_entry *p;
2151 
2152 	afinfo->seq_fops.open		= tcp_seq_open;
2153 	afinfo->seq_fops.read		= seq_read;
2154 	afinfo->seq_fops.llseek		= seq_lseek;
2155 	afinfo->seq_fops.release	= seq_release_net;
2156 
2157 	afinfo->seq_ops.start		= tcp_seq_start;
2158 	afinfo->seq_ops.next		= tcp_seq_next;
2159 	afinfo->seq_ops.stop		= tcp_seq_stop;
2160 
2161 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2162 			     &afinfo->seq_fops, afinfo);
2163 	if (!p)
2164 		rc = -ENOMEM;
2165 	return rc;
2166 }
2167 
2168 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2169 {
2170 	proc_net_remove(net, afinfo->name);
2171 }
2172 
2173 static void get_openreq4(struct sock *sk, struct request_sock *req,
2174 			 struct seq_file *f, int i, int uid, int *len)
2175 {
2176 	const struct inet_request_sock *ireq = inet_rsk(req);
2177 	int ttd = req->expires - jiffies;
2178 
2179 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2180 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2181 		i,
2182 		ireq->loc_addr,
2183 		ntohs(inet_sk(sk)->sport),
2184 		ireq->rmt_addr,
2185 		ntohs(ireq->rmt_port),
2186 		TCP_SYN_RECV,
2187 		0, 0, /* could print option size, but that is af dependent. */
2188 		1,    /* timers active (only the expire timer) */
2189 		jiffies_to_clock_t(ttd),
2190 		req->retrans,
2191 		uid,
2192 		0,  /* non standard timer */
2193 		0, /* open_requests have no inode */
2194 		atomic_read(&sk->sk_refcnt),
2195 		req,
2196 		len);
2197 }
2198 
2199 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2200 {
2201 	int timer_active;
2202 	unsigned long timer_expires;
2203 	struct tcp_sock *tp = tcp_sk(sk);
2204 	const struct inet_connection_sock *icsk = inet_csk(sk);
2205 	struct inet_sock *inet = inet_sk(sk);
2206 	__be32 dest = inet->daddr;
2207 	__be32 src = inet->rcv_saddr;
2208 	__u16 destp = ntohs(inet->dport);
2209 	__u16 srcp = ntohs(inet->sport);
2210 
2211 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2212 		timer_active	= 1;
2213 		timer_expires	= icsk->icsk_timeout;
2214 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2215 		timer_active	= 4;
2216 		timer_expires	= icsk->icsk_timeout;
2217 	} else if (timer_pending(&sk->sk_timer)) {
2218 		timer_active	= 2;
2219 		timer_expires	= sk->sk_timer.expires;
2220 	} else {
2221 		timer_active	= 0;
2222 		timer_expires = jiffies;
2223 	}
2224 
2225 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2226 			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2227 		i, src, srcp, dest, destp, sk->sk_state,
2228 		tp->write_seq - tp->snd_una,
2229 		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2230 					     (tp->rcv_nxt - tp->copied_seq),
2231 		timer_active,
2232 		jiffies_to_clock_t(timer_expires - jiffies),
2233 		icsk->icsk_retransmits,
2234 		sock_i_uid(sk),
2235 		icsk->icsk_probes_out,
2236 		sock_i_ino(sk),
2237 		atomic_read(&sk->sk_refcnt), sk,
2238 		jiffies_to_clock_t(icsk->icsk_rto),
2239 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2240 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2241 		tp->snd_cwnd,
2242 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
2243 		len);
2244 }
2245 
2246 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2247 			       struct seq_file *f, int i, int *len)
2248 {
2249 	__be32 dest, src;
2250 	__u16 destp, srcp;
2251 	int ttd = tw->tw_ttd - jiffies;
2252 
2253 	if (ttd < 0)
2254 		ttd = 0;
2255 
2256 	dest  = tw->tw_daddr;
2257 	src   = tw->tw_rcv_saddr;
2258 	destp = ntohs(tw->tw_dport);
2259 	srcp  = ntohs(tw->tw_sport);
2260 
2261 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2262 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2263 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2264 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2265 		atomic_read(&tw->tw_refcnt), tw, len);
2266 }
2267 
2268 #define TMPSZ 150
2269 
2270 static int tcp4_seq_show(struct seq_file *seq, void *v)
2271 {
2272 	struct tcp_iter_state* st;
2273 	int len;
2274 
2275 	if (v == SEQ_START_TOKEN) {
2276 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2277 			   "  sl  local_address rem_address   st tx_queue "
2278 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2279 			   "inode");
2280 		goto out;
2281 	}
2282 	st = seq->private;
2283 
2284 	switch (st->state) {
2285 	case TCP_SEQ_STATE_LISTENING:
2286 	case TCP_SEQ_STATE_ESTABLISHED:
2287 		get_tcp4_sock(v, seq, st->num, &len);
2288 		break;
2289 	case TCP_SEQ_STATE_OPENREQ:
2290 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2291 		break;
2292 	case TCP_SEQ_STATE_TIME_WAIT:
2293 		get_timewait4_sock(v, seq, st->num, &len);
2294 		break;
2295 	}
2296 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2297 out:
2298 	return 0;
2299 }
2300 
2301 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2302 	.name		= "tcp",
2303 	.family		= AF_INET,
2304 	.seq_fops	= {
2305 		.owner		= THIS_MODULE,
2306 	},
2307 	.seq_ops	= {
2308 		.show		= tcp4_seq_show,
2309 	},
2310 };
2311 
2312 static int tcp4_proc_init_net(struct net *net)
2313 {
2314 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2315 }
2316 
2317 static void tcp4_proc_exit_net(struct net *net)
2318 {
2319 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2320 }
2321 
2322 static struct pernet_operations tcp4_net_ops = {
2323 	.init = tcp4_proc_init_net,
2324 	.exit = tcp4_proc_exit_net,
2325 };
2326 
2327 int __init tcp4_proc_init(void)
2328 {
2329 	return register_pernet_subsys(&tcp4_net_ops);
2330 }
2331 
2332 void tcp4_proc_exit(void)
2333 {
2334 	unregister_pernet_subsys(&tcp4_net_ops);
2335 }
2336 #endif /* CONFIG_PROC_FS */
2337 
2338 struct proto tcp_prot = {
2339 	.name			= "TCP",
2340 	.owner			= THIS_MODULE,
2341 	.close			= tcp_close,
2342 	.connect		= tcp_v4_connect,
2343 	.disconnect		= tcp_disconnect,
2344 	.accept			= inet_csk_accept,
2345 	.ioctl			= tcp_ioctl,
2346 	.init			= tcp_v4_init_sock,
2347 	.destroy		= tcp_v4_destroy_sock,
2348 	.shutdown		= tcp_shutdown,
2349 	.setsockopt		= tcp_setsockopt,
2350 	.getsockopt		= tcp_getsockopt,
2351 	.recvmsg		= tcp_recvmsg,
2352 	.backlog_rcv		= tcp_v4_do_rcv,
2353 	.hash			= inet_hash,
2354 	.unhash			= inet_unhash,
2355 	.get_port		= inet_csk_get_port,
2356 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2357 	.sockets_allocated	= &tcp_sockets_allocated,
2358 	.orphan_count		= &tcp_orphan_count,
2359 	.memory_allocated	= &tcp_memory_allocated,
2360 	.memory_pressure	= &tcp_memory_pressure,
2361 	.sysctl_mem		= sysctl_tcp_mem,
2362 	.sysctl_wmem		= sysctl_tcp_wmem,
2363 	.sysctl_rmem		= sysctl_tcp_rmem,
2364 	.max_header		= MAX_TCP_HEADER,
2365 	.obj_size		= sizeof(struct tcp_sock),
2366 	.twsk_prot		= &tcp_timewait_sock_ops,
2367 	.rsk_prot		= &tcp_request_sock_ops,
2368 	.h.hashinfo		= &tcp_hashinfo,
2369 #ifdef CONFIG_COMPAT
2370 	.compat_setsockopt	= compat_tcp_setsockopt,
2371 	.compat_getsockopt	= compat_tcp_getsockopt,
2372 #endif
2373 };
2374 
2375 
2376 static int __net_init tcp_sk_init(struct net *net)
2377 {
2378 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2379 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2380 }
2381 
2382 static void __net_exit tcp_sk_exit(struct net *net)
2383 {
2384 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2385 }
2386 
2387 static struct pernet_operations __net_initdata tcp_sk_ops = {
2388        .init = tcp_sk_init,
2389        .exit = tcp_sk_exit,
2390 };
2391 
2392 void __init tcp_v4_init(void)
2393 {
2394 	if (register_pernet_device(&tcp_sk_ops))
2395 		panic("Failed to create the TCP control socket.\n");
2396 }
2397 
2398 EXPORT_SYMBOL(ipv4_specific);
2399 EXPORT_SYMBOL(tcp_hashinfo);
2400 EXPORT_SYMBOL(tcp_prot);
2401 EXPORT_SYMBOL(tcp_v4_conn_request);
2402 EXPORT_SYMBOL(tcp_v4_connect);
2403 EXPORT_SYMBOL(tcp_v4_do_rcv);
2404 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2405 EXPORT_SYMBOL(tcp_v4_send_check);
2406 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2407 
2408 #ifdef CONFIG_PROC_FS
2409 EXPORT_SYMBOL(tcp_proc_register);
2410 EXPORT_SYMBOL(tcp_proc_unregister);
2411 #endif
2412 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2413 
2414