xref: /openbmc/linux/net/ipv4/tcp_timer.c (revision 02c30a84e6298b6b20a56f0896ac80b47839e134)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Matthew Dillon, <dillon@apollo.west.oic.com>
19  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20  *		Jorge Cwik, <jorge@laser.satlink.net>
21  */
22 
23 #include <linux/module.h>
24 #include <net/tcp.h>
25 
26 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
27 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
28 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
29 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
30 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
31 int sysctl_tcp_retries1 = TCP_RETR1;
32 int sysctl_tcp_retries2 = TCP_RETR2;
33 int sysctl_tcp_orphan_retries;
34 
35 static void tcp_write_timer(unsigned long);
36 static void tcp_delack_timer(unsigned long);
37 static void tcp_keepalive_timer (unsigned long data);
38 
39 #ifdef TCP_DEBUG
40 const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
41 EXPORT_SYMBOL(tcp_timer_bug_msg);
42 #endif
43 
44 /*
45  * Using different timers for retransmit, delayed acks and probes
46  * We may wish use just one timer maintaining a list of expire jiffies
47  * to optimize.
48  */
49 
50 void tcp_init_xmit_timers(struct sock *sk)
51 {
52 	struct tcp_sock *tp = tcp_sk(sk);
53 
54 	init_timer(&tp->retransmit_timer);
55 	tp->retransmit_timer.function=&tcp_write_timer;
56 	tp->retransmit_timer.data = (unsigned long) sk;
57 	tp->pending = 0;
58 
59 	init_timer(&tp->delack_timer);
60 	tp->delack_timer.function=&tcp_delack_timer;
61 	tp->delack_timer.data = (unsigned long) sk;
62 	tp->ack.pending = 0;
63 
64 	init_timer(&sk->sk_timer);
65 	sk->sk_timer.function	= &tcp_keepalive_timer;
66 	sk->sk_timer.data	= (unsigned long)sk;
67 }
68 
69 void tcp_clear_xmit_timers(struct sock *sk)
70 {
71 	struct tcp_sock *tp = tcp_sk(sk);
72 
73 	tp->pending = 0;
74 	sk_stop_timer(sk, &tp->retransmit_timer);
75 
76 	tp->ack.pending = 0;
77 	tp->ack.blocked = 0;
78 	sk_stop_timer(sk, &tp->delack_timer);
79 
80 	sk_stop_timer(sk, &sk->sk_timer);
81 }
82 
83 static void tcp_write_err(struct sock *sk)
84 {
85 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
86 	sk->sk_error_report(sk);
87 
88 	tcp_done(sk);
89 	NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
90 }
91 
92 /* Do not allow orphaned sockets to eat all our resources.
93  * This is direct violation of TCP specs, but it is required
94  * to prevent DoS attacks. It is called when a retransmission timeout
95  * or zero probe timeout occurs on orphaned socket.
96  *
97  * Criterium is still not confirmed experimentally and may change.
98  * We kill the socket, if:
99  * 1. If number of orphaned sockets exceeds an administratively configured
100  *    limit.
101  * 2. If we have strong memory pressure.
102  */
103 static int tcp_out_of_resources(struct sock *sk, int do_reset)
104 {
105 	struct tcp_sock *tp = tcp_sk(sk);
106 	int orphans = atomic_read(&tcp_orphan_count);
107 
108 	/* If peer does not open window for long time, or did not transmit
109 	 * anything for long time, penalize it. */
110 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
111 		orphans <<= 1;
112 
113 	/* If some dubious ICMP arrived, penalize even more. */
114 	if (sk->sk_err_soft)
115 		orphans <<= 1;
116 
117 	if (orphans >= sysctl_tcp_max_orphans ||
118 	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
119 	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
120 		if (net_ratelimit())
121 			printk(KERN_INFO "Out of socket memory\n");
122 
123 		/* Catch exceptional cases, when connection requires reset.
124 		 *      1. Last segment was sent recently. */
125 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
126 		    /*  2. Window is closed. */
127 		    (!tp->snd_wnd && !tp->packets_out))
128 			do_reset = 1;
129 		if (do_reset)
130 			tcp_send_active_reset(sk, GFP_ATOMIC);
131 		tcp_done(sk);
132 		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
133 		return 1;
134 	}
135 	return 0;
136 }
137 
138 /* Calculate maximal number or retries on an orphaned socket. */
139 static int tcp_orphan_retries(struct sock *sk, int alive)
140 {
141 	int retries = sysctl_tcp_orphan_retries; /* May be zero. */
142 
143 	/* We know from an ICMP that something is wrong. */
144 	if (sk->sk_err_soft && !alive)
145 		retries = 0;
146 
147 	/* However, if socket sent something recently, select some safe
148 	 * number of retries. 8 corresponds to >100 seconds with minimal
149 	 * RTO of 200msec. */
150 	if (retries == 0 && alive)
151 		retries = 8;
152 	return retries;
153 }
154 
155 /* A write timeout has occurred. Process the after effects. */
156 static int tcp_write_timeout(struct sock *sk)
157 {
158 	struct tcp_sock *tp = tcp_sk(sk);
159 	int retry_until;
160 
161 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
162 		if (tp->retransmits)
163 			dst_negative_advice(&sk->sk_dst_cache);
164 		retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
165 	} else {
166 		if (tp->retransmits >= sysctl_tcp_retries1) {
167 			/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
168 			   hole detection. :-(
169 
170 			   It is place to make it. It is not made. I do not want
171 			   to make it. It is disguisting. It does not work in any
172 			   case. Let me to cite the same draft, which requires for
173 			   us to implement this:
174 
175    "The one security concern raised by this memo is that ICMP black holes
176    are often caused by over-zealous security administrators who block
177    all ICMP messages.  It is vitally important that those who design and
178    deploy security systems understand the impact of strict filtering on
179    upper-layer protocols.  The safest web site in the world is worthless
180    if most TCP implementations cannot transfer data from it.  It would
181    be far nicer to have all of the black holes fixed rather than fixing
182    all of the TCP implementations."
183 
184                            Golden words :-).
185 		   */
186 
187 			dst_negative_advice(&sk->sk_dst_cache);
188 		}
189 
190 		retry_until = sysctl_tcp_retries2;
191 		if (sock_flag(sk, SOCK_DEAD)) {
192 			int alive = (tp->rto < TCP_RTO_MAX);
193 
194 			retry_until = tcp_orphan_retries(sk, alive);
195 
196 			if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
197 				return 1;
198 		}
199 	}
200 
201 	if (tp->retransmits >= retry_until) {
202 		/* Has it gone just too far? */
203 		tcp_write_err(sk);
204 		return 1;
205 	}
206 	return 0;
207 }
208 
209 static void tcp_delack_timer(unsigned long data)
210 {
211 	struct sock *sk = (struct sock*)data;
212 	struct tcp_sock *tp = tcp_sk(sk);
213 
214 	bh_lock_sock(sk);
215 	if (sock_owned_by_user(sk)) {
216 		/* Try again later. */
217 		tp->ack.blocked = 1;
218 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
219 		sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
220 		goto out_unlock;
221 	}
222 
223 	sk_stream_mem_reclaim(sk);
224 
225 	if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
226 		goto out;
227 
228 	if (time_after(tp->ack.timeout, jiffies)) {
229 		sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
230 		goto out;
231 	}
232 	tp->ack.pending &= ~TCP_ACK_TIMER;
233 
234 	if (skb_queue_len(&tp->ucopy.prequeue)) {
235 		struct sk_buff *skb;
236 
237 		NET_ADD_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED,
238 				 skb_queue_len(&tp->ucopy.prequeue));
239 
240 		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
241 			sk->sk_backlog_rcv(sk, skb);
242 
243 		tp->ucopy.memory = 0;
244 	}
245 
246 	if (tcp_ack_scheduled(tp)) {
247 		if (!tp->ack.pingpong) {
248 			/* Delayed ACK missed: inflate ATO. */
249 			tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
250 		} else {
251 			/* Delayed ACK missed: leave pingpong mode and
252 			 * deflate ATO.
253 			 */
254 			tp->ack.pingpong = 0;
255 			tp->ack.ato = TCP_ATO_MIN;
256 		}
257 		tcp_send_ack(sk);
258 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
259 	}
260 	TCP_CHECK_TIMER(sk);
261 
262 out:
263 	if (tcp_memory_pressure)
264 		sk_stream_mem_reclaim(sk);
265 out_unlock:
266 	bh_unlock_sock(sk);
267 	sock_put(sk);
268 }
269 
270 static void tcp_probe_timer(struct sock *sk)
271 {
272 	struct tcp_sock *tp = tcp_sk(sk);
273 	int max_probes;
274 
275 	if (tp->packets_out || !sk->sk_send_head) {
276 		tp->probes_out = 0;
277 		return;
278 	}
279 
280 	/* *WARNING* RFC 1122 forbids this
281 	 *
282 	 * It doesn't AFAIK, because we kill the retransmit timer -AK
283 	 *
284 	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
285 	 * this behaviour in Solaris down as a bug fix. [AC]
286 	 *
287 	 * Let me to explain. probes_out is zeroed by incoming ACKs
288 	 * even if they advertise zero window. Hence, connection is killed only
289 	 * if we received no ACKs for normal connection timeout. It is not killed
290 	 * only because window stays zero for some time, window may be zero
291 	 * until armageddon and even later. We are in full accordance
292 	 * with RFCs, only probe timer combines both retransmission timeout
293 	 * and probe timeout in one bottle.				--ANK
294 	 */
295 	max_probes = sysctl_tcp_retries2;
296 
297 	if (sock_flag(sk, SOCK_DEAD)) {
298 		int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
299 
300 		max_probes = tcp_orphan_retries(sk, alive);
301 
302 		if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
303 			return;
304 	}
305 
306 	if (tp->probes_out > max_probes) {
307 		tcp_write_err(sk);
308 	} else {
309 		/* Only send another probe if we didn't close things up. */
310 		tcp_send_probe0(sk);
311 	}
312 }
313 
314 /*
315  *	The TCP retransmit timer.
316  */
317 
318 static void tcp_retransmit_timer(struct sock *sk)
319 {
320 	struct tcp_sock *tp = tcp_sk(sk);
321 
322 	if (!tp->packets_out)
323 		goto out;
324 
325 	BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
326 
327 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
328 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
329 		/* Receiver dastardly shrinks window. Our retransmits
330 		 * become zero probes, but we should not timeout this
331 		 * connection. If the socket is an orphan, time it out,
332 		 * we cannot allow such beasts to hang infinitely.
333 		 */
334 #ifdef TCP_DEBUG
335 		if (net_ratelimit()) {
336 			struct inet_sock *inet = inet_sk(sk);
337 			printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
338 			       NIPQUAD(inet->daddr), htons(inet->dport),
339 			       inet->num, tp->snd_una, tp->snd_nxt);
340 		}
341 #endif
342 		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
343 			tcp_write_err(sk);
344 			goto out;
345 		}
346 		tcp_enter_loss(sk, 0);
347 		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
348 		__sk_dst_reset(sk);
349 		goto out_reset_timer;
350 	}
351 
352 	if (tcp_write_timeout(sk))
353 		goto out;
354 
355 	if (tp->retransmits == 0) {
356 		if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
357 			if (tp->rx_opt.sack_ok) {
358 				if (tp->ca_state == TCP_CA_Recovery)
359 					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
360 				else
361 					NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
362 			} else {
363 				if (tp->ca_state == TCP_CA_Recovery)
364 					NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
365 				else
366 					NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
367 			}
368 		} else if (tp->ca_state == TCP_CA_Loss) {
369 			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
370 		} else {
371 			NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
372 		}
373 	}
374 
375 	if (tcp_use_frto(sk)) {
376 		tcp_enter_frto(sk);
377 	} else {
378 		tcp_enter_loss(sk, 0);
379 	}
380 
381 	if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
382 		/* Retransmission failed because of local congestion,
383 		 * do not backoff.
384 		 */
385 		if (!tp->retransmits)
386 			tp->retransmits=1;
387 		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
388 				     min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
389 		goto out;
390 	}
391 
392 	/* Increase the timeout each time we retransmit.  Note that
393 	 * we do not increase the rtt estimate.  rto is initialized
394 	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
395 	 * that doubling rto each time is the least we can get away with.
396 	 * In KA9Q, Karn uses this for the first few times, and then
397 	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
398 	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
399 	 * defined in the protocol as the maximum possible RTT.  I guess
400 	 * we'll have to use something other than TCP to talk to the
401 	 * University of Mars.
402 	 *
403 	 * PAWS allows us longer timeouts and large windows, so once
404 	 * implemented ftp to mars will work nicely. We will have to fix
405 	 * the 120 second clamps though!
406 	 */
407 	tp->backoff++;
408 	tp->retransmits++;
409 
410 out_reset_timer:
411 	tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
412 	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
413 	if (tp->retransmits > sysctl_tcp_retries1)
414 		__sk_dst_reset(sk);
415 
416 out:;
417 }
418 
419 static void tcp_write_timer(unsigned long data)
420 {
421 	struct sock *sk = (struct sock*)data;
422 	struct tcp_sock *tp = tcp_sk(sk);
423 	int event;
424 
425 	bh_lock_sock(sk);
426 	if (sock_owned_by_user(sk)) {
427 		/* Try again later */
428 		sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
429 		goto out_unlock;
430 	}
431 
432 	if (sk->sk_state == TCP_CLOSE || !tp->pending)
433 		goto out;
434 
435 	if (time_after(tp->timeout, jiffies)) {
436 		sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
437 		goto out;
438 	}
439 
440 	event = tp->pending;
441 	tp->pending = 0;
442 
443 	switch (event) {
444 	case TCP_TIME_RETRANS:
445 		tcp_retransmit_timer(sk);
446 		break;
447 	case TCP_TIME_PROBE0:
448 		tcp_probe_timer(sk);
449 		break;
450 	}
451 	TCP_CHECK_TIMER(sk);
452 
453 out:
454 	sk_stream_mem_reclaim(sk);
455 out_unlock:
456 	bh_unlock_sock(sk);
457 	sock_put(sk);
458 }
459 
460 /*
461  *	Timer for listening sockets
462  */
463 
464 static void tcp_synack_timer(struct sock *sk)
465 {
466 	struct tcp_sock *tp = tcp_sk(sk);
467 	struct tcp_listen_opt *lopt = tp->listen_opt;
468 	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
469 	int thresh = max_retries;
470 	unsigned long now = jiffies;
471 	struct open_request **reqp, *req;
472 	int i, budget;
473 
474 	if (lopt == NULL || lopt->qlen == 0)
475 		return;
476 
477 	/* Normally all the openreqs are young and become mature
478 	 * (i.e. converted to established socket) for first timeout.
479 	 * If synack was not acknowledged for 3 seconds, it means
480 	 * one of the following things: synack was lost, ack was lost,
481 	 * rtt is high or nobody planned to ack (i.e. synflood).
482 	 * When server is a bit loaded, queue is populated with old
483 	 * open requests, reducing effective size of queue.
484 	 * When server is well loaded, queue size reduces to zero
485 	 * after several minutes of work. It is not synflood,
486 	 * it is normal operation. The solution is pruning
487 	 * too old entries overriding normal timeout, when
488 	 * situation becomes dangerous.
489 	 *
490 	 * Essentially, we reserve half of room for young
491 	 * embrions; and abort old ones without pity, if old
492 	 * ones are about to clog our table.
493 	 */
494 	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
495 		int young = (lopt->qlen_young<<1);
496 
497 		while (thresh > 2) {
498 			if (lopt->qlen < young)
499 				break;
500 			thresh--;
501 			young <<= 1;
502 		}
503 	}
504 
505 	if (tp->defer_accept)
506 		max_retries = tp->defer_accept;
507 
508 	budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
509 	i = lopt->clock_hand;
510 
511 	do {
512 		reqp=&lopt->syn_table[i];
513 		while ((req = *reqp) != NULL) {
514 			if (time_after_eq(now, req->expires)) {
515 				if ((req->retrans < thresh ||
516 				     (req->acked && req->retrans < max_retries))
517 				    && !req->class->rtx_syn_ack(sk, req, NULL)) {
518 					unsigned long timeo;
519 
520 					if (req->retrans++ == 0)
521 						lopt->qlen_young--;
522 					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
523 						    TCP_RTO_MAX);
524 					req->expires = now + timeo;
525 					reqp = &req->dl_next;
526 					continue;
527 				}
528 
529 				/* Drop this request */
530 				write_lock(&tp->syn_wait_lock);
531 				*reqp = req->dl_next;
532 				write_unlock(&tp->syn_wait_lock);
533 				lopt->qlen--;
534 				if (req->retrans == 0)
535 					lopt->qlen_young--;
536 				tcp_openreq_free(req);
537 				continue;
538 			}
539 			reqp = &req->dl_next;
540 		}
541 
542 		i = (i+1)&(TCP_SYNQ_HSIZE-1);
543 
544 	} while (--budget > 0);
545 
546 	lopt->clock_hand = i;
547 
548 	if (lopt->qlen)
549 		tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
550 }
551 
552 void tcp_delete_keepalive_timer (struct sock *sk)
553 {
554 	sk_stop_timer(sk, &sk->sk_timer);
555 }
556 
557 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
558 {
559 	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
560 }
561 
562 void tcp_set_keepalive(struct sock *sk, int val)
563 {
564 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
565 		return;
566 
567 	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
568 		tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
569 	else if (!val)
570 		tcp_delete_keepalive_timer(sk);
571 }
572 
573 
574 static void tcp_keepalive_timer (unsigned long data)
575 {
576 	struct sock *sk = (struct sock *) data;
577 	struct tcp_sock *tp = tcp_sk(sk);
578 	__u32 elapsed;
579 
580 	/* Only process if socket is not in use. */
581 	bh_lock_sock(sk);
582 	if (sock_owned_by_user(sk)) {
583 		/* Try again later. */
584 		tcp_reset_keepalive_timer (sk, HZ/20);
585 		goto out;
586 	}
587 
588 	if (sk->sk_state == TCP_LISTEN) {
589 		tcp_synack_timer(sk);
590 		goto out;
591 	}
592 
593 	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
594 		if (tp->linger2 >= 0) {
595 			int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
596 
597 			if (tmo > 0) {
598 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
599 				goto out;
600 			}
601 		}
602 		tcp_send_active_reset(sk, GFP_ATOMIC);
603 		goto death;
604 	}
605 
606 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
607 		goto out;
608 
609 	elapsed = keepalive_time_when(tp);
610 
611 	/* It is alive without keepalive 8) */
612 	if (tp->packets_out || sk->sk_send_head)
613 		goto resched;
614 
615 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
616 
617 	if (elapsed >= keepalive_time_when(tp)) {
618 		if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
619 		     (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
620 			tcp_send_active_reset(sk, GFP_ATOMIC);
621 			tcp_write_err(sk);
622 			goto out;
623 		}
624 		if (tcp_write_wakeup(sk) <= 0) {
625 			tp->probes_out++;
626 			elapsed = keepalive_intvl_when(tp);
627 		} else {
628 			/* If keepalive was lost due to local congestion,
629 			 * try harder.
630 			 */
631 			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
632 		}
633 	} else {
634 		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
635 		elapsed = keepalive_time_when(tp) - elapsed;
636 	}
637 
638 	TCP_CHECK_TIMER(sk);
639 	sk_stream_mem_reclaim(sk);
640 
641 resched:
642 	tcp_reset_keepalive_timer (sk, elapsed);
643 	goto out;
644 
645 death:
646 	tcp_done(sk);
647 
648 out:
649 	bh_unlock_sock(sk);
650 	sock_put(sk);
651 }
652 
653 EXPORT_SYMBOL(tcp_clear_xmit_timers);
654 EXPORT_SYMBOL(tcp_delete_keepalive_timer);
655 EXPORT_SYMBOL(tcp_init_xmit_timers);
656 EXPORT_SYMBOL(tcp_reset_keepalive_timer);
657