1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21 
22 #include <net/tcp.h>
23 
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_conntrack_timeout.h>
33 #include <net/netfilter/nf_log.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
36 
37 /* "Be conservative in what you do,
38     be liberal in what you accept from others."
39     If it's non-zero, we mark only out of window RST segments as INVALID. */
40 static int nf_ct_tcp_be_liberal __read_mostly = 0;
41 
42 /* If it is set to zero, we disable picking up already established
43    connections. */
44 static int nf_ct_tcp_loose __read_mostly = 1;
45 
46 /* Max number of the retransmitted packets without receiving an (acceptable)
47    ACK from the destination. If this number is reached, a shorter timer
48    will be started. */
49 static int nf_ct_tcp_max_retrans __read_mostly = 3;
50 
51   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
52      closely.  They're more complex. --RR */
53 
54 static const char *const tcp_conntrack_names[] = {
55 	"NONE",
56 	"SYN_SENT",
57 	"SYN_RECV",
58 	"ESTABLISHED",
59 	"FIN_WAIT",
60 	"CLOSE_WAIT",
61 	"LAST_ACK",
62 	"TIME_WAIT",
63 	"CLOSE",
64 	"SYN_SENT2",
65 };
66 
67 #define SECS * HZ
68 #define MINS * 60 SECS
69 #define HOURS * 60 MINS
70 #define DAYS * 24 HOURS
71 
72 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
73 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
74 	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
75 	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
76 	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
77 	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
78 	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
79 	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
80 	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
81 	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
82 /* RFC1122 says the R2 limit should be at least 100 seconds.
83    Linux uses 15 packets as limit, which corresponds
84    to ~13-30min depending on RTO. */
85 	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
86 	[TCP_CONNTRACK_UNACK]		= 5 MINS,
87 };
88 
89 #define sNO TCP_CONNTRACK_NONE
90 #define sSS TCP_CONNTRACK_SYN_SENT
91 #define sSR TCP_CONNTRACK_SYN_RECV
92 #define sES TCP_CONNTRACK_ESTABLISHED
93 #define sFW TCP_CONNTRACK_FIN_WAIT
94 #define sCW TCP_CONNTRACK_CLOSE_WAIT
95 #define sLA TCP_CONNTRACK_LAST_ACK
96 #define sTW TCP_CONNTRACK_TIME_WAIT
97 #define sCL TCP_CONNTRACK_CLOSE
98 #define sS2 TCP_CONNTRACK_SYN_SENT2
99 #define sIV TCP_CONNTRACK_MAX
100 #define sIG TCP_CONNTRACK_IGNORE
101 
102 /* What TCP flags are set from RST/SYN/FIN/ACK. */
103 enum tcp_bit_set {
104 	TCP_SYN_SET,
105 	TCP_SYNACK_SET,
106 	TCP_FIN_SET,
107 	TCP_ACK_SET,
108 	TCP_RST_SET,
109 	TCP_NONE_SET,
110 };
111 
112 /*
113  * The TCP state transition table needs a few words...
114  *
115  * We are the man in the middle. All the packets go through us
116  * but might get lost in transit to the destination.
117  * It is assumed that the destinations can't receive segments
118  * we haven't seen.
119  *
120  * The checked segment is in window, but our windows are *not*
121  * equivalent with the ones of the sender/receiver. We always
122  * try to guess the state of the current sender.
123  *
124  * The meaning of the states are:
125  *
126  * NONE:	initial state
127  * SYN_SENT:	SYN-only packet seen
128  * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
129  * SYN_RECV:	SYN-ACK packet seen
130  * ESTABLISHED:	ACK packet seen
131  * FIN_WAIT:	FIN packet seen
132  * CLOSE_WAIT:	ACK seen (after FIN)
133  * LAST_ACK:	FIN seen (after FIN)
134  * TIME_WAIT:	last ACK seen
135  * CLOSE:	closed connection (RST)
136  *
137  * Packets marked as IGNORED (sIG):
138  *	if they may be either invalid or valid
139  *	and the receiver may send back a connection
140  *	closing RST or a SYN/ACK.
141  *
142  * Packets marked as INVALID (sIV):
143  *	if we regard them as truly invalid packets
144  */
145 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
146 	{
147 /* ORIGINAL */
148 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
149 /*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
150 /*
151  *	sNO -> sSS	Initialize a new connection
152  *	sSS -> sSS	Retransmitted SYN
153  *	sS2 -> sS2	Late retransmitted SYN
154  *	sSR -> sIG
155  *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
156  *			are errors. Receiver will reply with RST
157  *			and close the connection.
158  *			Or we are not in sync and hold a dead connection.
159  *	sFW -> sIG
160  *	sCW -> sIG
161  *	sLA -> sIG
162  *	sTW -> sSS	Reopened connection (RFC 1122).
163  *	sCL -> sSS
164  */
165 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
166 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
167 /*
168  *	sNO -> sIV	Too late and no reason to do anything
169  *	sSS -> sIV	Client can't send SYN and then SYN/ACK
170  *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
171  *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
172  *	sES -> sIV	Invalid SYN/ACK packets sent by the client
173  *	sFW -> sIV
174  *	sCW -> sIV
175  *	sLA -> sIV
176  *	sTW -> sIV
177  *	sCL -> sIV
178  */
179 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
180 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
181 /*
182  *	sNO -> sIV	Too late and no reason to do anything...
183  *	sSS -> sIV	Client migth not send FIN in this state:
184  *			we enforce waiting for a SYN/ACK reply first.
185  *	sS2 -> sIV
186  *	sSR -> sFW	Close started.
187  *	sES -> sFW
188  *	sFW -> sLA	FIN seen in both directions, waiting for
189  *			the last ACK.
190  *			Migth be a retransmitted FIN as well...
191  *	sCW -> sLA
192  *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
193  *	sTW -> sTW
194  *	sCL -> sCL
195  */
196 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
197 /*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
198 /*
199  *	sNO -> sES	Assumed.
200  *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
201  *	sS2 -> sIV
202  *	sSR -> sES	Established state is reached.
203  *	sES -> sES	:-)
204  *	sFW -> sCW	Normal close request answered by ACK.
205  *	sCW -> sCW
206  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
207  *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
208  *	sCL -> sCL
209  */
210 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
211 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
212 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
213 	},
214 	{
215 /* REPLY */
216 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
217 /*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
218 /*
219  *	sNO -> sIV	Never reached.
220  *	sSS -> sS2	Simultaneous open
221  *	sS2 -> sS2	Retransmitted simultaneous SYN
222  *	sSR -> sIV	Invalid SYN packets sent by the server
223  *	sES -> sIV
224  *	sFW -> sIV
225  *	sCW -> sIV
226  *	sLA -> sIV
227  *	sTW -> sSS	Reopened connection, but server may have switched role
228  *	sCL -> sIV
229  */
230 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
231 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
232 /*
233  *	sSS -> sSR	Standard open.
234  *	sS2 -> sSR	Simultaneous open
235  *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
236  *	sES -> sIG	Late retransmitted SYN/ACK?
237  *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
238  *	sCW -> sIG
239  *	sLA -> sIG
240  *	sTW -> sIG
241  *	sCL -> sIG
242  */
243 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
244 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
245 /*
246  *	sSS -> sIV	Server might not send FIN in this state.
247  *	sS2 -> sIV
248  *	sSR -> sFW	Close started.
249  *	sES -> sFW
250  *	sFW -> sLA	FIN seen in both directions.
251  *	sCW -> sLA
252  *	sLA -> sLA	Retransmitted FIN.
253  *	sTW -> sTW
254  *	sCL -> sCL
255  */
256 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
257 /*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
258 /*
259  *	sSS -> sIG	Might be a half-open connection.
260  *	sS2 -> sIG
261  *	sSR -> sSR	Might answer late resent SYN.
262  *	sES -> sES	:-)
263  *	sFW -> sCW	Normal close request answered by ACK.
264  *	sCW -> sCW
265  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
266  *	sTW -> sTW	Retransmitted last ACK.
267  *	sCL -> sCL
268  */
269 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
270 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
271 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
272 	}
273 };
274 
275 #ifdef CONFIG_NF_CONNTRACK_PROCFS
276 /* Print out the private part of the conntrack. */
277 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
278 {
279 	if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
280 		return;
281 
282 	seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
283 }
284 #endif
285 
286 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
287 {
288 	if (tcph->rst) return TCP_RST_SET;
289 	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
290 	else if (tcph->fin) return TCP_FIN_SET;
291 	else if (tcph->ack) return TCP_ACK_SET;
292 	else return TCP_NONE_SET;
293 }
294 
295 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
296    in IP Filter' by Guido van Rooij.
297 
298    http://www.sane.nl/events/sane2000/papers.html
299    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
300 
301    The boundaries and the conditions are changed according to RFC793:
302    the packet must intersect the window (i.e. segments may be
303    after the right or before the left edge) and thus receivers may ACK
304    segments after the right edge of the window.
305 
306 	td_maxend = max(sack + max(win,1)) seen in reply packets
307 	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
308 	td_maxwin += seq + len - sender.td_maxend
309 			if seq + len > sender.td_maxend
310 	td_end    = max(seq + len) seen in sent packets
311 
312    I.   Upper bound for valid data:	seq <= sender.td_maxend
313    II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
314    III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
315    IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
316 
317    where sack is the highest right edge of sack block found in the packet
318    or ack in the case of packet without SACK option.
319 
320    The upper bound limit for a valid (s)ack is not ignored -
321    we doesn't have to deal with fragments.
322 */
323 
324 static inline __u32 segment_seq_plus_len(__u32 seq,
325 					 size_t len,
326 					 unsigned int dataoff,
327 					 const struct tcphdr *tcph)
328 {
329 	/* XXX Should I use payload length field in IP/IPv6 header ?
330 	 * - YK */
331 	return (seq + len - dataoff - tcph->doff*4
332 		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
333 }
334 
335 /* Fixme: what about big packets? */
336 #define MAXACKWINCONST			66000
337 #define MAXACKWINDOW(sender)						\
338 	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
339 					      : MAXACKWINCONST)
340 
341 /*
342  * Simplified tcp_parse_options routine from tcp_input.c
343  */
344 static void tcp_options(const struct sk_buff *skb,
345 			unsigned int dataoff,
346 			const struct tcphdr *tcph,
347 			struct ip_ct_tcp_state *state)
348 {
349 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
350 	const unsigned char *ptr;
351 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
352 
353 	if (!length)
354 		return;
355 
356 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
357 				 length, buff);
358 	BUG_ON(ptr == NULL);
359 
360 	state->td_scale =
361 	state->flags = 0;
362 
363 	while (length > 0) {
364 		int opcode=*ptr++;
365 		int opsize;
366 
367 		switch (opcode) {
368 		case TCPOPT_EOL:
369 			return;
370 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
371 			length--;
372 			continue;
373 		default:
374 			if (length < 2)
375 				return;
376 			opsize=*ptr++;
377 			if (opsize < 2) /* "silly options" */
378 				return;
379 			if (opsize > length)
380 				return;	/* don't parse partial options */
381 
382 			if (opcode == TCPOPT_SACK_PERM
383 			    && opsize == TCPOLEN_SACK_PERM)
384 				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
385 			else if (opcode == TCPOPT_WINDOW
386 				 && opsize == TCPOLEN_WINDOW) {
387 				state->td_scale = *(u_int8_t *)ptr;
388 
389 				if (state->td_scale > TCP_MAX_WSCALE)
390 					state->td_scale = TCP_MAX_WSCALE;
391 
392 				state->flags |=
393 					IP_CT_TCP_FLAG_WINDOW_SCALE;
394 			}
395 			ptr += opsize - 2;
396 			length -= opsize;
397 		}
398 	}
399 }
400 
401 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
402                      const struct tcphdr *tcph, __u32 *sack)
403 {
404 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
405 	const unsigned char *ptr;
406 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
407 	__u32 tmp;
408 
409 	if (!length)
410 		return;
411 
412 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
413 				 length, buff);
414 	BUG_ON(ptr == NULL);
415 
416 	/* Fast path for timestamp-only option */
417 	if (length == TCPOLEN_TSTAMP_ALIGNED
418 	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
419 				       | (TCPOPT_NOP << 16)
420 				       | (TCPOPT_TIMESTAMP << 8)
421 				       | TCPOLEN_TIMESTAMP))
422 		return;
423 
424 	while (length > 0) {
425 		int opcode = *ptr++;
426 		int opsize, i;
427 
428 		switch (opcode) {
429 		case TCPOPT_EOL:
430 			return;
431 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
432 			length--;
433 			continue;
434 		default:
435 			if (length < 2)
436 				return;
437 			opsize = *ptr++;
438 			if (opsize < 2) /* "silly options" */
439 				return;
440 			if (opsize > length)
441 				return;	/* don't parse partial options */
442 
443 			if (opcode == TCPOPT_SACK
444 			    && opsize >= (TCPOLEN_SACK_BASE
445 					  + TCPOLEN_SACK_PERBLOCK)
446 			    && !((opsize - TCPOLEN_SACK_BASE)
447 				 % TCPOLEN_SACK_PERBLOCK)) {
448 				for (i = 0;
449 				     i < (opsize - TCPOLEN_SACK_BASE);
450 				     i += TCPOLEN_SACK_PERBLOCK) {
451 					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
452 
453 					if (after(tmp, *sack))
454 						*sack = tmp;
455 				}
456 				return;
457 			}
458 			ptr += opsize - 2;
459 			length -= opsize;
460 		}
461 	}
462 }
463 
464 static bool tcp_in_window(const struct nf_conn *ct,
465 			  struct ip_ct_tcp *state,
466 			  enum ip_conntrack_dir dir,
467 			  unsigned int index,
468 			  const struct sk_buff *skb,
469 			  unsigned int dataoff,
470 			  const struct tcphdr *tcph)
471 {
472 	struct net *net = nf_ct_net(ct);
473 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
474 	struct ip_ct_tcp_state *sender = &state->seen[dir];
475 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
476 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
477 	__u32 seq, ack, sack, end, win, swin;
478 	s32 receiver_offset;
479 	bool res, in_recv_win;
480 
481 	/*
482 	 * Get the required data from the packet.
483 	 */
484 	seq = ntohl(tcph->seq);
485 	ack = sack = ntohl(tcph->ack_seq);
486 	win = ntohs(tcph->window);
487 	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
488 
489 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
490 		tcp_sack(skb, dataoff, tcph, &sack);
491 
492 	/* Take into account NAT sequence number mangling */
493 	receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
494 	ack -= receiver_offset;
495 	sack -= receiver_offset;
496 
497 	pr_debug("tcp_in_window: START\n");
498 	pr_debug("tcp_in_window: ");
499 	nf_ct_dump_tuple(tuple);
500 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
501 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
502 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
503 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
504 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
505 		 sender->td_scale,
506 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
507 		 receiver->td_scale);
508 
509 	if (sender->td_maxwin == 0) {
510 		/*
511 		 * Initialize sender data.
512 		 */
513 		if (tcph->syn) {
514 			/*
515 			 * SYN-ACK in reply to a SYN
516 			 * or SYN from reply direction in simultaneous open.
517 			 */
518 			sender->td_end =
519 			sender->td_maxend = end;
520 			sender->td_maxwin = (win == 0 ? 1 : win);
521 
522 			tcp_options(skb, dataoff, tcph, sender);
523 			/*
524 			 * RFC 1323:
525 			 * Both sides must send the Window Scale option
526 			 * to enable window scaling in either direction.
527 			 */
528 			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
529 			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
530 				sender->td_scale =
531 				receiver->td_scale = 0;
532 			if (!tcph->ack)
533 				/* Simultaneous open */
534 				return true;
535 		} else {
536 			/*
537 			 * We are in the middle of a connection,
538 			 * its history is lost for us.
539 			 * Let's try to use the data from the packet.
540 			 */
541 			sender->td_end = end;
542 			swin = win << sender->td_scale;
543 			sender->td_maxwin = (swin == 0 ? 1 : swin);
544 			sender->td_maxend = end + sender->td_maxwin;
545 			/*
546 			 * We haven't seen traffic in the other direction yet
547 			 * but we have to tweak window tracking to pass III
548 			 * and IV until that happens.
549 			 */
550 			if (receiver->td_maxwin == 0)
551 				receiver->td_end = receiver->td_maxend = sack;
552 		}
553 	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
554 		     && dir == IP_CT_DIR_ORIGINAL)
555 		   || (state->state == TCP_CONNTRACK_SYN_RECV
556 		     && dir == IP_CT_DIR_REPLY))
557 		   && after(end, sender->td_end)) {
558 		/*
559 		 * RFC 793: "if a TCP is reinitialized ... then it need
560 		 * not wait at all; it must only be sure to use sequence
561 		 * numbers larger than those recently used."
562 		 */
563 		sender->td_end =
564 		sender->td_maxend = end;
565 		sender->td_maxwin = (win == 0 ? 1 : win);
566 
567 		tcp_options(skb, dataoff, tcph, sender);
568 	}
569 
570 	if (!(tcph->ack)) {
571 		/*
572 		 * If there is no ACK, just pretend it was set and OK.
573 		 */
574 		ack = sack = receiver->td_end;
575 	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
576 		    (TCP_FLAG_ACK|TCP_FLAG_RST))
577 		   && (ack == 0)) {
578 		/*
579 		 * Broken TCP stacks, that set ACK in RST packets as well
580 		 * with zero ack value.
581 		 */
582 		ack = sack = receiver->td_end;
583 	}
584 
585 	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
586 		/*
587 		 * RST sent answering SYN.
588 		 */
589 		seq = end = sender->td_end;
590 
591 	pr_debug("tcp_in_window: ");
592 	nf_ct_dump_tuple(tuple);
593 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
594 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
595 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
596 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
597 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
598 		 sender->td_scale,
599 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
600 		 receiver->td_scale);
601 
602 	/* Is the ending sequence in the receive window (if available)? */
603 	in_recv_win = !receiver->td_maxwin ||
604 		      after(end, sender->td_end - receiver->td_maxwin - 1);
605 
606 	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
607 		 before(seq, sender->td_maxend + 1),
608 		 (in_recv_win ? 1 : 0),
609 		 before(sack, receiver->td_end + 1),
610 		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
611 
612 	if (before(seq, sender->td_maxend + 1) &&
613 	    in_recv_win &&
614 	    before(sack, receiver->td_end + 1) &&
615 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
616 		/*
617 		 * Take into account window scaling (RFC 1323).
618 		 */
619 		if (!tcph->syn)
620 			win <<= sender->td_scale;
621 
622 		/*
623 		 * Update sender data.
624 		 */
625 		swin = win + (sack - ack);
626 		if (sender->td_maxwin < swin)
627 			sender->td_maxwin = swin;
628 		if (after(end, sender->td_end)) {
629 			sender->td_end = end;
630 			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
631 		}
632 		if (tcph->ack) {
633 			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
634 				sender->td_maxack = ack;
635 				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
636 			} else if (after(ack, sender->td_maxack))
637 				sender->td_maxack = ack;
638 		}
639 
640 		/*
641 		 * Update receiver data.
642 		 */
643 		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
644 			receiver->td_maxwin += end - sender->td_maxend;
645 		if (after(sack + win, receiver->td_maxend - 1)) {
646 			receiver->td_maxend = sack + win;
647 			if (win == 0)
648 				receiver->td_maxend++;
649 		}
650 		if (ack == receiver->td_end)
651 			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
652 
653 		/*
654 		 * Check retransmissions.
655 		 */
656 		if (index == TCP_ACK_SET) {
657 			if (state->last_dir == dir
658 			    && state->last_seq == seq
659 			    && state->last_ack == ack
660 			    && state->last_end == end
661 			    && state->last_win == win)
662 				state->retrans++;
663 			else {
664 				state->last_dir = dir;
665 				state->last_seq = seq;
666 				state->last_ack = ack;
667 				state->last_end = end;
668 				state->last_win = win;
669 				state->retrans = 0;
670 			}
671 		}
672 		res = true;
673 	} else {
674 		res = false;
675 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
676 		    tn->tcp_be_liberal)
677 			res = true;
678 		if (!res) {
679 			nf_ct_l4proto_log_invalid(skb, ct,
680 			"%s",
681 			before(seq, sender->td_maxend + 1) ?
682 			in_recv_win ?
683 			before(sack, receiver->td_end + 1) ?
684 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
685 			: "ACK is under the lower bound (possible overly delayed ACK)"
686 			: "ACK is over the upper bound (ACKed data not seen yet)"
687 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
688 			: "SEQ is over the upper bound (over the window of the receiver)");
689 		}
690 	}
691 
692 	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
693 		 "receiver end=%u maxend=%u maxwin=%u\n",
694 		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
695 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
696 
697 	return res;
698 }
699 
700 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
701 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
702 				 TCPHDR_URG) + 1] =
703 {
704 	[TCPHDR_SYN]				= 1,
705 	[TCPHDR_SYN|TCPHDR_URG]			= 1,
706 	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
707 	[TCPHDR_RST]				= 1,
708 	[TCPHDR_RST|TCPHDR_ACK]			= 1,
709 	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
710 	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
711 	[TCPHDR_ACK]				= 1,
712 	[TCPHDR_ACK|TCPHDR_URG]			= 1,
713 };
714 
715 static void tcp_error_log(const struct sk_buff *skb,
716 			  const struct nf_hook_state *state,
717 			  const char *msg)
718 {
719 	nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
720 }
721 
722 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
723 static bool tcp_error(const struct tcphdr *th,
724 		      struct sk_buff *skb,
725 		      unsigned int dataoff,
726 		      const struct nf_hook_state *state)
727 {
728 	unsigned int tcplen = skb->len - dataoff;
729 	u8 tcpflags;
730 
731 	/* Not whole TCP header or malformed packet */
732 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
733 		tcp_error_log(skb, state, "truncated packet");
734 		return true;
735 	}
736 
737 	/* Checksum invalid? Ignore.
738 	 * We skip checking packets on the outgoing path
739 	 * because the checksum is assumed to be correct.
740 	 */
741 	/* FIXME: Source route IP option packets --RR */
742 	if (state->net->ct.sysctl_checksum &&
743 	    state->hook == NF_INET_PRE_ROUTING &&
744 	    nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) {
745 		tcp_error_log(skb, state, "bad checksum");
746 		return true;
747 	}
748 
749 	/* Check TCP flags. */
750 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
751 	if (!tcp_valid_flags[tcpflags]) {
752 		tcp_error_log(skb, state, "invalid tcp flag combination");
753 		return true;
754 	}
755 
756 	return false;
757 }
758 
759 static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
760 			     unsigned int dataoff,
761 			     const struct tcphdr *th)
762 {
763 	enum tcp_conntrack new_state;
764 	struct net *net = nf_ct_net(ct);
765 	const struct nf_tcp_net *tn = nf_tcp_pernet(net);
766 	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
767 	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
768 
769 	/* Don't need lock here: this conntrack not in circulation yet */
770 	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
771 
772 	/* Invalid: delete conntrack */
773 	if (new_state >= TCP_CONNTRACK_MAX) {
774 		pr_debug("nf_ct_tcp: invalid new deleting.\n");
775 		return false;
776 	}
777 
778 	if (new_state == TCP_CONNTRACK_SYN_SENT) {
779 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
780 		/* SYN packet */
781 		ct->proto.tcp.seen[0].td_end =
782 			segment_seq_plus_len(ntohl(th->seq), skb->len,
783 					     dataoff, th);
784 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
785 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
786 			ct->proto.tcp.seen[0].td_maxwin = 1;
787 		ct->proto.tcp.seen[0].td_maxend =
788 			ct->proto.tcp.seen[0].td_end;
789 
790 		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
791 	} else if (tn->tcp_loose == 0) {
792 		/* Don't try to pick up connections. */
793 		return false;
794 	} else {
795 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
796 		/*
797 		 * We are in the middle of a connection,
798 		 * its history is lost for us.
799 		 * Let's try to use the data from the packet.
800 		 */
801 		ct->proto.tcp.seen[0].td_end =
802 			segment_seq_plus_len(ntohl(th->seq), skb->len,
803 					     dataoff, th);
804 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
805 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
806 			ct->proto.tcp.seen[0].td_maxwin = 1;
807 		ct->proto.tcp.seen[0].td_maxend =
808 			ct->proto.tcp.seen[0].td_end +
809 			ct->proto.tcp.seen[0].td_maxwin;
810 
811 		/* We assume SACK and liberal window checking to handle
812 		 * window scaling */
813 		ct->proto.tcp.seen[0].flags =
814 		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
815 					      IP_CT_TCP_FLAG_BE_LIBERAL;
816 	}
817 
818 	/* tcp_packet will set them */
819 	ct->proto.tcp.last_index = TCP_NONE_SET;
820 
821 	pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
822 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
823 		 __func__,
824 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
825 		 sender->td_scale,
826 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
827 		 receiver->td_scale);
828 	return true;
829 }
830 
831 /* Returns verdict for packet, or -1 for invalid. */
832 static int tcp_packet(struct nf_conn *ct,
833 		      struct sk_buff *skb,
834 		      unsigned int dataoff,
835 		      enum ip_conntrack_info ctinfo,
836 		      const struct nf_hook_state *state)
837 {
838 	struct net *net = nf_ct_net(ct);
839 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
840 	struct nf_conntrack_tuple *tuple;
841 	enum tcp_conntrack new_state, old_state;
842 	unsigned int index, *timeouts;
843 	enum ip_conntrack_dir dir;
844 	const struct tcphdr *th;
845 	struct tcphdr _tcph;
846 	unsigned long timeout;
847 
848 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
849 	if (th == NULL)
850 		return -NF_ACCEPT;
851 
852 	if (tcp_error(th, skb, dataoff, state))
853 		return -NF_ACCEPT;
854 
855 	if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
856 		return -NF_ACCEPT;
857 
858 	spin_lock_bh(&ct->lock);
859 	old_state = ct->proto.tcp.state;
860 	dir = CTINFO2DIR(ctinfo);
861 	index = get_conntrack_index(th);
862 	new_state = tcp_conntracks[dir][index][old_state];
863 	tuple = &ct->tuplehash[dir].tuple;
864 
865 	switch (new_state) {
866 	case TCP_CONNTRACK_SYN_SENT:
867 		if (old_state < TCP_CONNTRACK_TIME_WAIT)
868 			break;
869 		/* RFC 1122: "When a connection is closed actively,
870 		 * it MUST linger in TIME-WAIT state for a time 2xMSL
871 		 * (Maximum Segment Lifetime). However, it MAY accept
872 		 * a new SYN from the remote TCP to reopen the connection
873 		 * directly from TIME-WAIT state, if..."
874 		 * We ignore the conditions because we are in the
875 		 * TIME-WAIT state anyway.
876 		 *
877 		 * Handle aborted connections: we and the server
878 		 * think there is an existing connection but the client
879 		 * aborts it and starts a new one.
880 		 */
881 		if (((ct->proto.tcp.seen[dir].flags
882 		      | ct->proto.tcp.seen[!dir].flags)
883 		     & IP_CT_TCP_FLAG_CLOSE_INIT)
884 		    || (ct->proto.tcp.last_dir == dir
885 		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
886 			/* Attempt to reopen a closed/aborted connection.
887 			 * Delete this connection and look up again. */
888 			spin_unlock_bh(&ct->lock);
889 
890 			/* Only repeat if we can actually remove the timer.
891 			 * Destruction may already be in progress in process
892 			 * context and we must give it a chance to terminate.
893 			 */
894 			if (nf_ct_kill(ct))
895 				return -NF_REPEAT;
896 			return NF_DROP;
897 		}
898 		/* Fall through */
899 	case TCP_CONNTRACK_IGNORE:
900 		/* Ignored packets:
901 		 *
902 		 * Our connection entry may be out of sync, so ignore
903 		 * packets which may signal the real connection between
904 		 * the client and the server.
905 		 *
906 		 * a) SYN in ORIGINAL
907 		 * b) SYN/ACK in REPLY
908 		 * c) ACK in reply direction after initial SYN in original.
909 		 *
910 		 * If the ignored packet is invalid, the receiver will send
911 		 * a RST we'll catch below.
912 		 */
913 		if (index == TCP_SYNACK_SET
914 		    && ct->proto.tcp.last_index == TCP_SYN_SET
915 		    && ct->proto.tcp.last_dir != dir
916 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
917 			/* b) This SYN/ACK acknowledges a SYN that we earlier
918 			 * ignored as invalid. This means that the client and
919 			 * the server are both in sync, while the firewall is
920 			 * not. We get in sync from the previously annotated
921 			 * values.
922 			 */
923 			old_state = TCP_CONNTRACK_SYN_SENT;
924 			new_state = TCP_CONNTRACK_SYN_RECV;
925 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
926 				ct->proto.tcp.last_end;
927 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
928 				ct->proto.tcp.last_end;
929 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
930 				ct->proto.tcp.last_win == 0 ?
931 					1 : ct->proto.tcp.last_win;
932 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
933 				ct->proto.tcp.last_wscale;
934 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
935 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
936 				ct->proto.tcp.last_flags;
937 			memset(&ct->proto.tcp.seen[dir], 0,
938 			       sizeof(struct ip_ct_tcp_state));
939 			break;
940 		}
941 		ct->proto.tcp.last_index = index;
942 		ct->proto.tcp.last_dir = dir;
943 		ct->proto.tcp.last_seq = ntohl(th->seq);
944 		ct->proto.tcp.last_end =
945 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
946 		ct->proto.tcp.last_win = ntohs(th->window);
947 
948 		/* a) This is a SYN in ORIGINAL. The client and the server
949 		 * may be in sync but we are not. In that case, we annotate
950 		 * the TCP options and let the packet go through. If it is a
951 		 * valid SYN packet, the server will reply with a SYN/ACK, and
952 		 * then we'll get in sync. Otherwise, the server potentially
953 		 * responds with a challenge ACK if implementing RFC5961.
954 		 */
955 		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
956 			struct ip_ct_tcp_state seen = {};
957 
958 			ct->proto.tcp.last_flags =
959 			ct->proto.tcp.last_wscale = 0;
960 			tcp_options(skb, dataoff, th, &seen);
961 			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
962 				ct->proto.tcp.last_flags |=
963 					IP_CT_TCP_FLAG_WINDOW_SCALE;
964 				ct->proto.tcp.last_wscale = seen.td_scale;
965 			}
966 			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
967 				ct->proto.tcp.last_flags |=
968 					IP_CT_TCP_FLAG_SACK_PERM;
969 			}
970 			/* Mark the potential for RFC5961 challenge ACK,
971 			 * this pose a special problem for LAST_ACK state
972 			 * as ACK is intrepretated as ACKing last FIN.
973 			 */
974 			if (old_state == TCP_CONNTRACK_LAST_ACK)
975 				ct->proto.tcp.last_flags |=
976 					IP_CT_EXP_CHALLENGE_ACK;
977 		}
978 		spin_unlock_bh(&ct->lock);
979 		nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
980 					  "state %s ", tcp_conntrack_names[old_state]);
981 		return NF_ACCEPT;
982 	case TCP_CONNTRACK_MAX:
983 		/* Special case for SYN proxy: when the SYN to the server or
984 		 * the SYN/ACK from the server is lost, the client may transmit
985 		 * a keep-alive packet while in SYN_SENT state. This needs to
986 		 * be associated with the original conntrack entry in order to
987 		 * generate a new SYN with the correct sequence number.
988 		 */
989 		if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
990 		    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
991 		    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
992 		    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
993 			pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
994 			spin_unlock_bh(&ct->lock);
995 			return NF_ACCEPT;
996 		}
997 
998 		/* Invalid packet */
999 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
1000 			 dir, get_conntrack_index(th), old_state);
1001 		spin_unlock_bh(&ct->lock);
1002 		nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
1003 		return -NF_ACCEPT;
1004 	case TCP_CONNTRACK_TIME_WAIT:
1005 		/* RFC5961 compliance cause stack to send "challenge-ACK"
1006 		 * e.g. in response to spurious SYNs.  Conntrack MUST
1007 		 * not believe this ACK is acking last FIN.
1008 		 */
1009 		if (old_state == TCP_CONNTRACK_LAST_ACK &&
1010 		    index == TCP_ACK_SET &&
1011 		    ct->proto.tcp.last_dir != dir &&
1012 		    ct->proto.tcp.last_index == TCP_SYN_SET &&
1013 		    (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
1014 			/* Detected RFC5961 challenge ACK */
1015 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
1016 			spin_unlock_bh(&ct->lock);
1017 			nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
1018 			return NF_ACCEPT; /* Don't change state */
1019 		}
1020 		break;
1021 	case TCP_CONNTRACK_SYN_SENT2:
1022 		/* tcp_conntracks table is not smart enough to handle
1023 		 * simultaneous open.
1024 		 */
1025 		ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
1026 		break;
1027 	case TCP_CONNTRACK_SYN_RECV:
1028 		if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
1029 		    ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
1030 			new_state = TCP_CONNTRACK_ESTABLISHED;
1031 		break;
1032 	case TCP_CONNTRACK_CLOSE:
1033 		if (index == TCP_RST_SET
1034 		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
1035 		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
1036 			/* Invalid RST  */
1037 			spin_unlock_bh(&ct->lock);
1038 			nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
1039 			return -NF_ACCEPT;
1040 		}
1041 		if (index == TCP_RST_SET
1042 		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1043 			 && ct->proto.tcp.last_index == TCP_SYN_SET)
1044 			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
1045 			    && ct->proto.tcp.last_index == TCP_ACK_SET))
1046 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1047 			/* RST sent to invalid SYN or ACK we had let through
1048 			 * at a) and c) above:
1049 			 *
1050 			 * a) SYN was in window then
1051 			 * c) we hold a half-open connection.
1052 			 *
1053 			 * Delete our connection entry.
1054 			 * We skip window checking, because packet might ACK
1055 			 * segments we ignored. */
1056 			goto in_window;
1057 		}
1058 		/* Just fall through */
1059 	default:
1060 		/* Keep compilers happy. */
1061 		break;
1062 	}
1063 
1064 	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1065 			   skb, dataoff, th)) {
1066 		spin_unlock_bh(&ct->lock);
1067 		return -NF_ACCEPT;
1068 	}
1069      in_window:
1070 	/* From now on we have got in-window packets */
1071 	ct->proto.tcp.last_index = index;
1072 	ct->proto.tcp.last_dir = dir;
1073 
1074 	pr_debug("tcp_conntracks: ");
1075 	nf_ct_dump_tuple(tuple);
1076 	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1077 		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1078 		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1079 		 old_state, new_state);
1080 
1081 	ct->proto.tcp.state = new_state;
1082 	if (old_state != new_state
1083 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1084 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1085 
1086 	timeouts = nf_ct_timeout_lookup(ct);
1087 	if (!timeouts)
1088 		timeouts = tn->timeouts;
1089 
1090 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1091 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1092 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1093 	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1094 		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1095 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1096 		timeout = timeouts[TCP_CONNTRACK_UNACK];
1097 	else if (ct->proto.tcp.last_win == 0 &&
1098 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1099 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1100 	else
1101 		timeout = timeouts[new_state];
1102 	spin_unlock_bh(&ct->lock);
1103 
1104 	if (new_state != old_state)
1105 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1106 
1107 	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1108 		/* If only reply is a RST, we can consider ourselves not to
1109 		   have an established connection: this is a fairly common
1110 		   problem case, so we can delete the conntrack
1111 		   immediately.  --RR */
1112 		if (th->rst) {
1113 			nf_ct_kill_acct(ct, ctinfo, skb);
1114 			return NF_ACCEPT;
1115 		}
1116 		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1117 		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1118 		 */
1119 		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1120 		    timeout > timeouts[TCP_CONNTRACK_UNACK])
1121 			timeout = timeouts[TCP_CONNTRACK_UNACK];
1122 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1123 		   && (old_state == TCP_CONNTRACK_SYN_RECV
1124 		       || old_state == TCP_CONNTRACK_ESTABLISHED)
1125 		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1126 		/* Set ASSURED if we see see valid ack in ESTABLISHED
1127 		   after SYN_RECV or a valid answer for a picked up
1128 		   connection. */
1129 		set_bit(IPS_ASSURED_BIT, &ct->status);
1130 		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1131 	}
1132 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1133 
1134 	return NF_ACCEPT;
1135 }
1136 
1137 static bool tcp_can_early_drop(const struct nf_conn *ct)
1138 {
1139 	switch (ct->proto.tcp.state) {
1140 	case TCP_CONNTRACK_FIN_WAIT:
1141 	case TCP_CONNTRACK_LAST_ACK:
1142 	case TCP_CONNTRACK_TIME_WAIT:
1143 	case TCP_CONNTRACK_CLOSE:
1144 	case TCP_CONNTRACK_CLOSE_WAIT:
1145 		return true;
1146 	default:
1147 		break;
1148 	}
1149 
1150 	return false;
1151 }
1152 
1153 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1154 
1155 #include <linux/netfilter/nfnetlink.h>
1156 #include <linux/netfilter/nfnetlink_conntrack.h>
1157 
1158 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1159 			 struct nf_conn *ct)
1160 {
1161 	struct nlattr *nest_parms;
1162 	struct nf_ct_tcp_flags tmp = {};
1163 
1164 	spin_lock_bh(&ct->lock);
1165 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1166 	if (!nest_parms)
1167 		goto nla_put_failure;
1168 
1169 	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1170 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1171 		       ct->proto.tcp.seen[0].td_scale) ||
1172 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1173 		       ct->proto.tcp.seen[1].td_scale))
1174 		goto nla_put_failure;
1175 
1176 	tmp.flags = ct->proto.tcp.seen[0].flags;
1177 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1178 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1179 		goto nla_put_failure;
1180 
1181 	tmp.flags = ct->proto.tcp.seen[1].flags;
1182 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1183 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1184 		goto nla_put_failure;
1185 	spin_unlock_bh(&ct->lock);
1186 
1187 	nla_nest_end(skb, nest_parms);
1188 
1189 	return 0;
1190 
1191 nla_put_failure:
1192 	spin_unlock_bh(&ct->lock);
1193 	return -1;
1194 }
1195 
1196 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1197 	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
1198 	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1199 	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1200 	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1201 	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
1202 };
1203 
1204 #define TCP_NLATTR_SIZE	( \
1205 	NLA_ALIGN(NLA_HDRLEN + 1) + \
1206 	NLA_ALIGN(NLA_HDRLEN + 1) + \
1207 	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \
1208 	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)))
1209 
1210 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1211 {
1212 	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1213 	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1214 	int err;
1215 
1216 	/* updates could not contain anything about the private
1217 	 * protocol info, in that case skip the parsing */
1218 	if (!pattr)
1219 		return 0;
1220 
1221 	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1222 			       tcp_nla_policy, NULL);
1223 	if (err < 0)
1224 		return err;
1225 
1226 	if (tb[CTA_PROTOINFO_TCP_STATE] &&
1227 	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1228 		return -EINVAL;
1229 
1230 	spin_lock_bh(&ct->lock);
1231 	if (tb[CTA_PROTOINFO_TCP_STATE])
1232 		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1233 
1234 	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1235 		struct nf_ct_tcp_flags *attr =
1236 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1237 		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1238 		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1239 	}
1240 
1241 	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1242 		struct nf_ct_tcp_flags *attr =
1243 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1244 		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1245 		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1246 	}
1247 
1248 	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1249 	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1250 	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1251 	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1252 		ct->proto.tcp.seen[0].td_scale =
1253 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1254 		ct->proto.tcp.seen[1].td_scale =
1255 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1256 	}
1257 	spin_unlock_bh(&ct->lock);
1258 
1259 	return 0;
1260 }
1261 
1262 static unsigned int tcp_nlattr_tuple_size(void)
1263 {
1264 	static unsigned int size __read_mostly;
1265 
1266 	if (!size)
1267 		size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1268 
1269 	return size;
1270 }
1271 #endif
1272 
1273 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1274 
1275 #include <linux/netfilter/nfnetlink.h>
1276 #include <linux/netfilter/nfnetlink_cttimeout.h>
1277 
1278 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1279 				     struct net *net, void *data)
1280 {
1281 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
1282 	unsigned int *timeouts = data;
1283 	int i;
1284 
1285 	if (!timeouts)
1286 		timeouts = tn->timeouts;
1287 	/* set default TCP timeouts. */
1288 	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1289 		timeouts[i] = tn->timeouts[i];
1290 
1291 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1292 		timeouts[TCP_CONNTRACK_SYN_SENT] =
1293 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1294 	}
1295 
1296 	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1297 		timeouts[TCP_CONNTRACK_SYN_RECV] =
1298 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1299 	}
1300 	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1301 		timeouts[TCP_CONNTRACK_ESTABLISHED] =
1302 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1303 	}
1304 	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1305 		timeouts[TCP_CONNTRACK_FIN_WAIT] =
1306 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1307 	}
1308 	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1309 		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1310 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1311 	}
1312 	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1313 		timeouts[TCP_CONNTRACK_LAST_ACK] =
1314 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1315 	}
1316 	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1317 		timeouts[TCP_CONNTRACK_TIME_WAIT] =
1318 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1319 	}
1320 	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1321 		timeouts[TCP_CONNTRACK_CLOSE] =
1322 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1323 	}
1324 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1325 		timeouts[TCP_CONNTRACK_SYN_SENT2] =
1326 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1327 	}
1328 	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1329 		timeouts[TCP_CONNTRACK_RETRANS] =
1330 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1331 	}
1332 	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1333 		timeouts[TCP_CONNTRACK_UNACK] =
1334 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1335 	}
1336 
1337 	timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
1338 	return 0;
1339 }
1340 
1341 static int
1342 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1343 {
1344 	const unsigned int *timeouts = data;
1345 
1346 	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1347 			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1348 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1349 			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1350 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1351 			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1352 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1353 			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1354 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1355 			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1356 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1357 			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1358 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1359 			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1360 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1361 			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1362 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1363 			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1364 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1365 			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1366 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1367 			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1368 		goto nla_put_failure;
1369 	return 0;
1370 
1371 nla_put_failure:
1372 	return -ENOSPC;
1373 }
1374 
1375 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1376 	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
1377 	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
1378 	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
1379 	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
1380 	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
1381 	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
1382 	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
1383 	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
1384 	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
1385 	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
1386 	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
1387 };
1388 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1389 
1390 #ifdef CONFIG_SYSCTL
1391 static struct ctl_table tcp_sysctl_table[] = {
1392 	{
1393 		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
1394 		.maxlen		= sizeof(unsigned int),
1395 		.mode		= 0644,
1396 		.proc_handler	= proc_dointvec_jiffies,
1397 	},
1398 	{
1399 		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
1400 		.maxlen		= sizeof(unsigned int),
1401 		.mode		= 0644,
1402 		.proc_handler	= proc_dointvec_jiffies,
1403 	},
1404 	{
1405 		.procname	= "nf_conntrack_tcp_timeout_established",
1406 		.maxlen		= sizeof(unsigned int),
1407 		.mode		= 0644,
1408 		.proc_handler	= proc_dointvec_jiffies,
1409 	},
1410 	{
1411 		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
1412 		.maxlen		= sizeof(unsigned int),
1413 		.mode		= 0644,
1414 		.proc_handler	= proc_dointvec_jiffies,
1415 	},
1416 	{
1417 		.procname	= "nf_conntrack_tcp_timeout_close_wait",
1418 		.maxlen		= sizeof(unsigned int),
1419 		.mode		= 0644,
1420 		.proc_handler	= proc_dointvec_jiffies,
1421 	},
1422 	{
1423 		.procname	= "nf_conntrack_tcp_timeout_last_ack",
1424 		.maxlen		= sizeof(unsigned int),
1425 		.mode		= 0644,
1426 		.proc_handler	= proc_dointvec_jiffies,
1427 	},
1428 	{
1429 		.procname	= "nf_conntrack_tcp_timeout_time_wait",
1430 		.maxlen		= sizeof(unsigned int),
1431 		.mode		= 0644,
1432 		.proc_handler	= proc_dointvec_jiffies,
1433 	},
1434 	{
1435 		.procname	= "nf_conntrack_tcp_timeout_close",
1436 		.maxlen		= sizeof(unsigned int),
1437 		.mode		= 0644,
1438 		.proc_handler	= proc_dointvec_jiffies,
1439 	},
1440 	{
1441 		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
1442 		.maxlen		= sizeof(unsigned int),
1443 		.mode		= 0644,
1444 		.proc_handler	= proc_dointvec_jiffies,
1445 	},
1446 	{
1447 		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
1448 		.maxlen		= sizeof(unsigned int),
1449 		.mode		= 0644,
1450 		.proc_handler	= proc_dointvec_jiffies,
1451 	},
1452 	{
1453 		.procname	= "nf_conntrack_tcp_loose",
1454 		.maxlen		= sizeof(unsigned int),
1455 		.mode		= 0644,
1456 		.proc_handler	= proc_dointvec,
1457 	},
1458 	{
1459 		.procname       = "nf_conntrack_tcp_be_liberal",
1460 		.maxlen         = sizeof(unsigned int),
1461 		.mode           = 0644,
1462 		.proc_handler   = proc_dointvec,
1463 	},
1464 	{
1465 		.procname	= "nf_conntrack_tcp_max_retrans",
1466 		.maxlen		= sizeof(unsigned int),
1467 		.mode		= 0644,
1468 		.proc_handler	= proc_dointvec,
1469 	},
1470 	{ }
1471 };
1472 #endif /* CONFIG_SYSCTL */
1473 
1474 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1475 				    struct nf_tcp_net *tn)
1476 {
1477 #ifdef CONFIG_SYSCTL
1478 	if (pn->ctl_table)
1479 		return 0;
1480 
1481 	pn->ctl_table = kmemdup(tcp_sysctl_table,
1482 				sizeof(tcp_sysctl_table),
1483 				GFP_KERNEL);
1484 	if (!pn->ctl_table)
1485 		return -ENOMEM;
1486 
1487 	pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1488 	pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1489 	pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1490 	pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1491 	pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1492 	pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1493 	pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1494 	pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1495 	pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1496 	pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1497 	pn->ctl_table[10].data = &tn->tcp_loose;
1498 	pn->ctl_table[11].data = &tn->tcp_be_liberal;
1499 	pn->ctl_table[12].data = &tn->tcp_max_retrans;
1500 #endif
1501 	return 0;
1502 }
1503 
1504 static int tcp_init_net(struct net *net)
1505 {
1506 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
1507 	struct nf_proto_net *pn = &tn->pn;
1508 
1509 	if (!pn->users) {
1510 		int i;
1511 
1512 		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1513 			tn->timeouts[i] = tcp_timeouts[i];
1514 
1515 		/* timeouts[0] is unused, make it same as SYN_SENT so
1516 		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
1517 		 */
1518 		tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
1519 		tn->tcp_loose = nf_ct_tcp_loose;
1520 		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1521 		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1522 	}
1523 
1524 	return tcp_kmemdup_sysctl_table(pn, tn);
1525 }
1526 
1527 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1528 {
1529 	return &net->ct.nf_ct_proto.tcp.pn;
1530 }
1531 
1532 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
1533 {
1534 	.l4proto 		= IPPROTO_TCP,
1535 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1536 	.print_conntrack 	= tcp_print_conntrack,
1537 #endif
1538 	.packet 		= tcp_packet,
1539 	.can_early_drop		= tcp_can_early_drop,
1540 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1541 	.to_nlattr		= tcp_to_nlattr,
1542 	.from_nlattr		= nlattr_to_tcp,
1543 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1544 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1545 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1546 	.nlattr_size		= TCP_NLATTR_SIZE,
1547 	.nla_policy		= nf_ct_port_nla_policy,
1548 #endif
1549 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1550 	.ctnl_timeout		= {
1551 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1552 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1553 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1554 		.obj_size	= sizeof(unsigned int) *
1555 					TCP_CONNTRACK_TIMEOUT_MAX,
1556 		.nla_policy	= tcp_timeout_nla_policy,
1557 	},
1558 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1559 	.init_net		= tcp_init_net,
1560 	.get_net_proto		= tcp_get_net_proto,
1561 };
1562