1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21 
22 #include <net/tcp.h>
23 
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_log.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35 
36 /* "Be conservative in what you do,
37     be liberal in what you accept from others."
38     If it's non-zero, we mark only out of window RST segments as INVALID. */
39 static int nf_ct_tcp_be_liberal __read_mostly = 0;
40 
41 /* If it is set to zero, we disable picking up already established
42    connections. */
43 static int nf_ct_tcp_loose __read_mostly = 1;
44 
45 /* Max number of the retransmitted packets without receiving an (acceptable)
46    ACK from the destination. If this number is reached, a shorter timer
47    will be started. */
48 static int nf_ct_tcp_max_retrans __read_mostly = 3;
49 
50   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
51      closely.  They're more complex. --RR */
52 
53 static const char *const tcp_conntrack_names[] = {
54 	"NONE",
55 	"SYN_SENT",
56 	"SYN_RECV",
57 	"ESTABLISHED",
58 	"FIN_WAIT",
59 	"CLOSE_WAIT",
60 	"LAST_ACK",
61 	"TIME_WAIT",
62 	"CLOSE",
63 	"SYN_SENT2",
64 };
65 
66 #define SECS * HZ
67 #define MINS * 60 SECS
68 #define HOURS * 60 MINS
69 #define DAYS * 24 HOURS
70 
71 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
72 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
73 	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
74 	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
75 	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
76 	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
77 	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
78 	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
79 	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
80 	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
81 /* RFC1122 says the R2 limit should be at least 100 seconds.
82    Linux uses 15 packets as limit, which corresponds
83    to ~13-30min depending on RTO. */
84 	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
85 	[TCP_CONNTRACK_UNACK]		= 5 MINS,
86 };
87 
88 #define sNO TCP_CONNTRACK_NONE
89 #define sSS TCP_CONNTRACK_SYN_SENT
90 #define sSR TCP_CONNTRACK_SYN_RECV
91 #define sES TCP_CONNTRACK_ESTABLISHED
92 #define sFW TCP_CONNTRACK_FIN_WAIT
93 #define sCW TCP_CONNTRACK_CLOSE_WAIT
94 #define sLA TCP_CONNTRACK_LAST_ACK
95 #define sTW TCP_CONNTRACK_TIME_WAIT
96 #define sCL TCP_CONNTRACK_CLOSE
97 #define sS2 TCP_CONNTRACK_SYN_SENT2
98 #define sIV TCP_CONNTRACK_MAX
99 #define sIG TCP_CONNTRACK_IGNORE
100 
101 /* What TCP flags are set from RST/SYN/FIN/ACK. */
102 enum tcp_bit_set {
103 	TCP_SYN_SET,
104 	TCP_SYNACK_SET,
105 	TCP_FIN_SET,
106 	TCP_ACK_SET,
107 	TCP_RST_SET,
108 	TCP_NONE_SET,
109 };
110 
111 /*
112  * The TCP state transition table needs a few words...
113  *
114  * We are the man in the middle. All the packets go through us
115  * but might get lost in transit to the destination.
116  * It is assumed that the destinations can't receive segments
117  * we haven't seen.
118  *
119  * The checked segment is in window, but our windows are *not*
120  * equivalent with the ones of the sender/receiver. We always
121  * try to guess the state of the current sender.
122  *
123  * The meaning of the states are:
124  *
125  * NONE:	initial state
126  * SYN_SENT:	SYN-only packet seen
127  * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
128  * SYN_RECV:	SYN-ACK packet seen
129  * ESTABLISHED:	ACK packet seen
130  * FIN_WAIT:	FIN packet seen
131  * CLOSE_WAIT:	ACK seen (after FIN)
132  * LAST_ACK:	FIN seen (after FIN)
133  * TIME_WAIT:	last ACK seen
134  * CLOSE:	closed connection (RST)
135  *
136  * Packets marked as IGNORED (sIG):
137  *	if they may be either invalid or valid
138  *	and the receiver may send back a connection
139  *	closing RST or a SYN/ACK.
140  *
141  * Packets marked as INVALID (sIV):
142  *	if we regard them as truly invalid packets
143  */
144 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
145 	{
146 /* ORIGINAL */
147 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
148 /*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
149 /*
150  *	sNO -> sSS	Initialize a new connection
151  *	sSS -> sSS	Retransmitted SYN
152  *	sS2 -> sS2	Late retransmitted SYN
153  *	sSR -> sIG
154  *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
155  *			are errors. Receiver will reply with RST
156  *			and close the connection.
157  *			Or we are not in sync and hold a dead connection.
158  *	sFW -> sIG
159  *	sCW -> sIG
160  *	sLA -> sIG
161  *	sTW -> sSS	Reopened connection (RFC 1122).
162  *	sCL -> sSS
163  */
164 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
165 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
166 /*
167  *	sNO -> sIV	Too late and no reason to do anything
168  *	sSS -> sIV	Client can't send SYN and then SYN/ACK
169  *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
170  *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
171  *	sES -> sIV	Invalid SYN/ACK packets sent by the client
172  *	sFW -> sIV
173  *	sCW -> sIV
174  *	sLA -> sIV
175  *	sTW -> sIV
176  *	sCL -> sIV
177  */
178 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
179 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
180 /*
181  *	sNO -> sIV	Too late and no reason to do anything...
182  *	sSS -> sIV	Client migth not send FIN in this state:
183  *			we enforce waiting for a SYN/ACK reply first.
184  *	sS2 -> sIV
185  *	sSR -> sFW	Close started.
186  *	sES -> sFW
187  *	sFW -> sLA	FIN seen in both directions, waiting for
188  *			the last ACK.
189  *			Migth be a retransmitted FIN as well...
190  *	sCW -> sLA
191  *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
192  *	sTW -> sTW
193  *	sCL -> sCL
194  */
195 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
196 /*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
197 /*
198  *	sNO -> sES	Assumed.
199  *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
200  *	sS2 -> sIV
201  *	sSR -> sES	Established state is reached.
202  *	sES -> sES	:-)
203  *	sFW -> sCW	Normal close request answered by ACK.
204  *	sCW -> sCW
205  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
206  *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
207  *	sCL -> sCL
208  */
209 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
210 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
211 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
212 	},
213 	{
214 /* REPLY */
215 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
216 /*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
217 /*
218  *	sNO -> sIV	Never reached.
219  *	sSS -> sS2	Simultaneous open
220  *	sS2 -> sS2	Retransmitted simultaneous SYN
221  *	sSR -> sIV	Invalid SYN packets sent by the server
222  *	sES -> sIV
223  *	sFW -> sIV
224  *	sCW -> sIV
225  *	sLA -> sIV
226  *	sTW -> sSS	Reopened connection, but server may have switched role
227  *	sCL -> sIV
228  */
229 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
230 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231 /*
232  *	sSS -> sSR	Standard open.
233  *	sS2 -> sSR	Simultaneous open
234  *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
235  *	sES -> sIG	Late retransmitted SYN/ACK?
236  *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
237  *	sCW -> sIG
238  *	sLA -> sIG
239  *	sTW -> sIG
240  *	sCL -> sIG
241  */
242 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
243 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
244 /*
245  *	sSS -> sIV	Server might not send FIN in this state.
246  *	sS2 -> sIV
247  *	sSR -> sFW	Close started.
248  *	sES -> sFW
249  *	sFW -> sLA	FIN seen in both directions.
250  *	sCW -> sLA
251  *	sLA -> sLA	Retransmitted FIN.
252  *	sTW -> sTW
253  *	sCL -> sCL
254  */
255 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
256 /*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
257 /*
258  *	sSS -> sIG	Might be a half-open connection.
259  *	sS2 -> sIG
260  *	sSR -> sSR	Might answer late resent SYN.
261  *	sES -> sES	:-)
262  *	sFW -> sCW	Normal close request answered by ACK.
263  *	sCW -> sCW
264  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
265  *	sTW -> sTW	Retransmitted last ACK.
266  *	sCL -> sCL
267  */
268 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
269 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
270 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
271 	}
272 };
273 
274 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
275 {
276 	return &net->ct.nf_ct_proto.tcp;
277 }
278 
279 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
280 			     struct net *net, struct nf_conntrack_tuple *tuple)
281 {
282 	const struct tcphdr *hp;
283 	struct tcphdr _hdr;
284 
285 	/* Actually only need first 8 bytes. */
286 	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
287 	if (hp == NULL)
288 		return false;
289 
290 	tuple->src.u.tcp.port = hp->source;
291 	tuple->dst.u.tcp.port = hp->dest;
292 
293 	return true;
294 }
295 
296 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
297 			     const struct nf_conntrack_tuple *orig)
298 {
299 	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
300 	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
301 	return true;
302 }
303 
304 /* Print out the per-protocol part of the tuple. */
305 static void tcp_print_tuple(struct seq_file *s,
306 			    const struct nf_conntrack_tuple *tuple)
307 {
308 	seq_printf(s, "sport=%hu dport=%hu ",
309 		   ntohs(tuple->src.u.tcp.port),
310 		   ntohs(tuple->dst.u.tcp.port));
311 }
312 
313 /* Print out the private part of the conntrack. */
314 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
315 {
316 	enum tcp_conntrack state;
317 
318 	spin_lock_bh(&ct->lock);
319 	state = ct->proto.tcp.state;
320 	spin_unlock_bh(&ct->lock);
321 
322 	seq_printf(s, "%s ", tcp_conntrack_names[state]);
323 }
324 
325 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
326 {
327 	if (tcph->rst) return TCP_RST_SET;
328 	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
329 	else if (tcph->fin) return TCP_FIN_SET;
330 	else if (tcph->ack) return TCP_ACK_SET;
331 	else return TCP_NONE_SET;
332 }
333 
334 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
335    in IP Filter' by Guido van Rooij.
336 
337    http://www.sane.nl/events/sane2000/papers.html
338    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
339 
340    The boundaries and the conditions are changed according to RFC793:
341    the packet must intersect the window (i.e. segments may be
342    after the right or before the left edge) and thus receivers may ACK
343    segments after the right edge of the window.
344 
345 	td_maxend = max(sack + max(win,1)) seen in reply packets
346 	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
347 	td_maxwin += seq + len - sender.td_maxend
348 			if seq + len > sender.td_maxend
349 	td_end    = max(seq + len) seen in sent packets
350 
351    I.   Upper bound for valid data:	seq <= sender.td_maxend
352    II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
353    III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
354    IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
355 
356    where sack is the highest right edge of sack block found in the packet
357    or ack in the case of packet without SACK option.
358 
359    The upper bound limit for a valid (s)ack is not ignored -
360    we doesn't have to deal with fragments.
361 */
362 
363 static inline __u32 segment_seq_plus_len(__u32 seq,
364 					 size_t len,
365 					 unsigned int dataoff,
366 					 const struct tcphdr *tcph)
367 {
368 	/* XXX Should I use payload length field in IP/IPv6 header ?
369 	 * - YK */
370 	return (seq + len - dataoff - tcph->doff*4
371 		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
372 }
373 
374 /* Fixme: what about big packets? */
375 #define MAXACKWINCONST			66000
376 #define MAXACKWINDOW(sender)						\
377 	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
378 					      : MAXACKWINCONST)
379 
380 /*
381  * Simplified tcp_parse_options routine from tcp_input.c
382  */
383 static void tcp_options(const struct sk_buff *skb,
384 			unsigned int dataoff,
385 			const struct tcphdr *tcph,
386 			struct ip_ct_tcp_state *state)
387 {
388 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
389 	const unsigned char *ptr;
390 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
391 
392 	if (!length)
393 		return;
394 
395 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
396 				 length, buff);
397 	BUG_ON(ptr == NULL);
398 
399 	state->td_scale =
400 	state->flags = 0;
401 
402 	while (length > 0) {
403 		int opcode=*ptr++;
404 		int opsize;
405 
406 		switch (opcode) {
407 		case TCPOPT_EOL:
408 			return;
409 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
410 			length--;
411 			continue;
412 		default:
413 			opsize=*ptr++;
414 			if (opsize < 2) /* "silly options" */
415 				return;
416 			if (opsize > length)
417 				return;	/* don't parse partial options */
418 
419 			if (opcode == TCPOPT_SACK_PERM
420 			    && opsize == TCPOLEN_SACK_PERM)
421 				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
422 			else if (opcode == TCPOPT_WINDOW
423 				 && opsize == TCPOLEN_WINDOW) {
424 				state->td_scale = *(u_int8_t *)ptr;
425 
426 				if (state->td_scale > 14) {
427 					/* See RFC1323 */
428 					state->td_scale = 14;
429 				}
430 				state->flags |=
431 					IP_CT_TCP_FLAG_WINDOW_SCALE;
432 			}
433 			ptr += opsize - 2;
434 			length -= opsize;
435 		}
436 	}
437 }
438 
439 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
440                      const struct tcphdr *tcph, __u32 *sack)
441 {
442 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
443 	const unsigned char *ptr;
444 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
445 	__u32 tmp;
446 
447 	if (!length)
448 		return;
449 
450 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
451 				 length, buff);
452 	BUG_ON(ptr == NULL);
453 
454 	/* Fast path for timestamp-only option */
455 	if (length == TCPOLEN_TSTAMP_ALIGNED
456 	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
457 				       | (TCPOPT_NOP << 16)
458 				       | (TCPOPT_TIMESTAMP << 8)
459 				       | TCPOLEN_TIMESTAMP))
460 		return;
461 
462 	while (length > 0) {
463 		int opcode = *ptr++;
464 		int opsize, i;
465 
466 		switch (opcode) {
467 		case TCPOPT_EOL:
468 			return;
469 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
470 			length--;
471 			continue;
472 		default:
473 			opsize = *ptr++;
474 			if (opsize < 2) /* "silly options" */
475 				return;
476 			if (opsize > length)
477 				return;	/* don't parse partial options */
478 
479 			if (opcode == TCPOPT_SACK
480 			    && opsize >= (TCPOLEN_SACK_BASE
481 					  + TCPOLEN_SACK_PERBLOCK)
482 			    && !((opsize - TCPOLEN_SACK_BASE)
483 				 % TCPOLEN_SACK_PERBLOCK)) {
484 				for (i = 0;
485 				     i < (opsize - TCPOLEN_SACK_BASE);
486 				     i += TCPOLEN_SACK_PERBLOCK) {
487 					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
488 
489 					if (after(tmp, *sack))
490 						*sack = tmp;
491 				}
492 				return;
493 			}
494 			ptr += opsize - 2;
495 			length -= opsize;
496 		}
497 	}
498 }
499 
500 static bool tcp_in_window(const struct nf_conn *ct,
501 			  struct ip_ct_tcp *state,
502 			  enum ip_conntrack_dir dir,
503 			  unsigned int index,
504 			  const struct sk_buff *skb,
505 			  unsigned int dataoff,
506 			  const struct tcphdr *tcph,
507 			  u_int8_t pf)
508 {
509 	struct net *net = nf_ct_net(ct);
510 	struct nf_tcp_net *tn = tcp_pernet(net);
511 	struct ip_ct_tcp_state *sender = &state->seen[dir];
512 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
513 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
514 	__u32 seq, ack, sack, end, win, swin;
515 	s32 receiver_offset;
516 	bool res, in_recv_win;
517 
518 	/*
519 	 * Get the required data from the packet.
520 	 */
521 	seq = ntohl(tcph->seq);
522 	ack = sack = ntohl(tcph->ack_seq);
523 	win = ntohs(tcph->window);
524 	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
525 
526 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
527 		tcp_sack(skb, dataoff, tcph, &sack);
528 
529 	/* Take into account NAT sequence number mangling */
530 	receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
531 	ack -= receiver_offset;
532 	sack -= receiver_offset;
533 
534 	pr_debug("tcp_in_window: START\n");
535 	pr_debug("tcp_in_window: ");
536 	nf_ct_dump_tuple(tuple);
537 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
538 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
539 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
540 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
541 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
542 		 sender->td_scale,
543 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
544 		 receiver->td_scale);
545 
546 	if (sender->td_maxwin == 0) {
547 		/*
548 		 * Initialize sender data.
549 		 */
550 		if (tcph->syn) {
551 			/*
552 			 * SYN-ACK in reply to a SYN
553 			 * or SYN from reply direction in simultaneous open.
554 			 */
555 			sender->td_end =
556 			sender->td_maxend = end;
557 			sender->td_maxwin = (win == 0 ? 1 : win);
558 
559 			tcp_options(skb, dataoff, tcph, sender);
560 			/*
561 			 * RFC 1323:
562 			 * Both sides must send the Window Scale option
563 			 * to enable window scaling in either direction.
564 			 */
565 			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
566 			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
567 				sender->td_scale =
568 				receiver->td_scale = 0;
569 			if (!tcph->ack)
570 				/* Simultaneous open */
571 				return true;
572 		} else {
573 			/*
574 			 * We are in the middle of a connection,
575 			 * its history is lost for us.
576 			 * Let's try to use the data from the packet.
577 			 */
578 			sender->td_end = end;
579 			swin = win << sender->td_scale;
580 			sender->td_maxwin = (swin == 0 ? 1 : swin);
581 			sender->td_maxend = end + sender->td_maxwin;
582 			/*
583 			 * We haven't seen traffic in the other direction yet
584 			 * but we have to tweak window tracking to pass III
585 			 * and IV until that happens.
586 			 */
587 			if (receiver->td_maxwin == 0)
588 				receiver->td_end = receiver->td_maxend = sack;
589 		}
590 	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
591 		     && dir == IP_CT_DIR_ORIGINAL)
592 		   || (state->state == TCP_CONNTRACK_SYN_RECV
593 		     && dir == IP_CT_DIR_REPLY))
594 		   && after(end, sender->td_end)) {
595 		/*
596 		 * RFC 793: "if a TCP is reinitialized ... then it need
597 		 * not wait at all; it must only be sure to use sequence
598 		 * numbers larger than those recently used."
599 		 */
600 		sender->td_end =
601 		sender->td_maxend = end;
602 		sender->td_maxwin = (win == 0 ? 1 : win);
603 
604 		tcp_options(skb, dataoff, tcph, sender);
605 	}
606 
607 	if (!(tcph->ack)) {
608 		/*
609 		 * If there is no ACK, just pretend it was set and OK.
610 		 */
611 		ack = sack = receiver->td_end;
612 	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
613 		    (TCP_FLAG_ACK|TCP_FLAG_RST))
614 		   && (ack == 0)) {
615 		/*
616 		 * Broken TCP stacks, that set ACK in RST packets as well
617 		 * with zero ack value.
618 		 */
619 		ack = sack = receiver->td_end;
620 	}
621 
622 	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
623 		/*
624 		 * RST sent answering SYN.
625 		 */
626 		seq = end = sender->td_end;
627 
628 	pr_debug("tcp_in_window: ");
629 	nf_ct_dump_tuple(tuple);
630 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
631 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
632 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
633 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
634 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
635 		 sender->td_scale,
636 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
637 		 receiver->td_scale);
638 
639 	/* Is the ending sequence in the receive window (if available)? */
640 	in_recv_win = !receiver->td_maxwin ||
641 		      after(end, sender->td_end - receiver->td_maxwin - 1);
642 
643 	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
644 		 before(seq, sender->td_maxend + 1),
645 		 (in_recv_win ? 1 : 0),
646 		 before(sack, receiver->td_end + 1),
647 		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
648 
649 	if (before(seq, sender->td_maxend + 1) &&
650 	    in_recv_win &&
651 	    before(sack, receiver->td_end + 1) &&
652 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
653 		/*
654 		 * Take into account window scaling (RFC 1323).
655 		 */
656 		if (!tcph->syn)
657 			win <<= sender->td_scale;
658 
659 		/*
660 		 * Update sender data.
661 		 */
662 		swin = win + (sack - ack);
663 		if (sender->td_maxwin < swin)
664 			sender->td_maxwin = swin;
665 		if (after(end, sender->td_end)) {
666 			sender->td_end = end;
667 			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
668 		}
669 		if (tcph->ack) {
670 			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
671 				sender->td_maxack = ack;
672 				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
673 			} else if (after(ack, sender->td_maxack))
674 				sender->td_maxack = ack;
675 		}
676 
677 		/*
678 		 * Update receiver data.
679 		 */
680 		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
681 			receiver->td_maxwin += end - sender->td_maxend;
682 		if (after(sack + win, receiver->td_maxend - 1)) {
683 			receiver->td_maxend = sack + win;
684 			if (win == 0)
685 				receiver->td_maxend++;
686 		}
687 		if (ack == receiver->td_end)
688 			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
689 
690 		/*
691 		 * Check retransmissions.
692 		 */
693 		if (index == TCP_ACK_SET) {
694 			if (state->last_dir == dir
695 			    && state->last_seq == seq
696 			    && state->last_ack == ack
697 			    && state->last_end == end
698 			    && state->last_win == win)
699 				state->retrans++;
700 			else {
701 				state->last_dir = dir;
702 				state->last_seq = seq;
703 				state->last_ack = ack;
704 				state->last_end = end;
705 				state->last_win = win;
706 				state->retrans = 0;
707 			}
708 		}
709 		res = true;
710 	} else {
711 		res = false;
712 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
713 		    tn->tcp_be_liberal)
714 			res = true;
715 		if (!res && LOG_INVALID(net, IPPROTO_TCP))
716 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
717 			"nf_ct_tcp: %s ",
718 			before(seq, sender->td_maxend + 1) ?
719 			in_recv_win ?
720 			before(sack, receiver->td_end + 1) ?
721 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
722 			: "ACK is under the lower bound (possible overly delayed ACK)"
723 			: "ACK is over the upper bound (ACKed data not seen yet)"
724 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
725 			: "SEQ is over the upper bound (over the window of the receiver)");
726 	}
727 
728 	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
729 		 "receiver end=%u maxend=%u maxwin=%u\n",
730 		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
731 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
732 
733 	return res;
734 }
735 
736 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
737 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
738 				 TCPHDR_URG) + 1] =
739 {
740 	[TCPHDR_SYN]				= 1,
741 	[TCPHDR_SYN|TCPHDR_URG]			= 1,
742 	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
743 	[TCPHDR_RST]				= 1,
744 	[TCPHDR_RST|TCPHDR_ACK]			= 1,
745 	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
746 	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
747 	[TCPHDR_ACK]				= 1,
748 	[TCPHDR_ACK|TCPHDR_URG]			= 1,
749 };
750 
751 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
752 static int tcp_error(struct net *net, struct nf_conn *tmpl,
753 		     struct sk_buff *skb,
754 		     unsigned int dataoff,
755 		     enum ip_conntrack_info *ctinfo,
756 		     u_int8_t pf,
757 		     unsigned int hooknum)
758 {
759 	const struct tcphdr *th;
760 	struct tcphdr _tcph;
761 	unsigned int tcplen = skb->len - dataoff;
762 	u_int8_t tcpflags;
763 
764 	/* Smaller that minimal TCP header? */
765 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
766 	if (th == NULL) {
767 		if (LOG_INVALID(net, IPPROTO_TCP))
768 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
769 				"nf_ct_tcp: short packet ");
770 		return -NF_ACCEPT;
771 	}
772 
773 	/* Not whole TCP header or malformed packet */
774 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
775 		if (LOG_INVALID(net, IPPROTO_TCP))
776 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
777 				"nf_ct_tcp: truncated/malformed packet ");
778 		return -NF_ACCEPT;
779 	}
780 
781 	/* Checksum invalid? Ignore.
782 	 * We skip checking packets on the outgoing path
783 	 * because the checksum is assumed to be correct.
784 	 */
785 	/* FIXME: Source route IP option packets --RR */
786 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
787 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
788 		if (LOG_INVALID(net, IPPROTO_TCP))
789 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
790 				  "nf_ct_tcp: bad TCP checksum ");
791 		return -NF_ACCEPT;
792 	}
793 
794 	/* Check TCP flags. */
795 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
796 	if (!tcp_valid_flags[tcpflags]) {
797 		if (LOG_INVALID(net, IPPROTO_TCP))
798 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
799 				  "nf_ct_tcp: invalid TCP flag combination ");
800 		return -NF_ACCEPT;
801 	}
802 
803 	return NF_ACCEPT;
804 }
805 
806 static unsigned int *tcp_get_timeouts(struct net *net)
807 {
808 	return tcp_pernet(net)->timeouts;
809 }
810 
811 /* Returns verdict for packet, or -1 for invalid. */
812 static int tcp_packet(struct nf_conn *ct,
813 		      const struct sk_buff *skb,
814 		      unsigned int dataoff,
815 		      enum ip_conntrack_info ctinfo,
816 		      u_int8_t pf,
817 		      unsigned int hooknum,
818 		      unsigned int *timeouts)
819 {
820 	struct net *net = nf_ct_net(ct);
821 	struct nf_tcp_net *tn = tcp_pernet(net);
822 	struct nf_conntrack_tuple *tuple;
823 	enum tcp_conntrack new_state, old_state;
824 	enum ip_conntrack_dir dir;
825 	const struct tcphdr *th;
826 	struct tcphdr _tcph;
827 	unsigned long timeout;
828 	unsigned int index;
829 
830 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
831 	BUG_ON(th == NULL);
832 
833 	spin_lock_bh(&ct->lock);
834 	old_state = ct->proto.tcp.state;
835 	dir = CTINFO2DIR(ctinfo);
836 	index = get_conntrack_index(th);
837 	new_state = tcp_conntracks[dir][index][old_state];
838 	tuple = &ct->tuplehash[dir].tuple;
839 
840 	switch (new_state) {
841 	case TCP_CONNTRACK_SYN_SENT:
842 		if (old_state < TCP_CONNTRACK_TIME_WAIT)
843 			break;
844 		/* RFC 1122: "When a connection is closed actively,
845 		 * it MUST linger in TIME-WAIT state for a time 2xMSL
846 		 * (Maximum Segment Lifetime). However, it MAY accept
847 		 * a new SYN from the remote TCP to reopen the connection
848 		 * directly from TIME-WAIT state, if..."
849 		 * We ignore the conditions because we are in the
850 		 * TIME-WAIT state anyway.
851 		 *
852 		 * Handle aborted connections: we and the server
853 		 * think there is an existing connection but the client
854 		 * aborts it and starts a new one.
855 		 */
856 		if (((ct->proto.tcp.seen[dir].flags
857 		      | ct->proto.tcp.seen[!dir].flags)
858 		     & IP_CT_TCP_FLAG_CLOSE_INIT)
859 		    || (ct->proto.tcp.last_dir == dir
860 		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
861 			/* Attempt to reopen a closed/aborted connection.
862 			 * Delete this connection and look up again. */
863 			spin_unlock_bh(&ct->lock);
864 
865 			/* Only repeat if we can actually remove the timer.
866 			 * Destruction may already be in progress in process
867 			 * context and we must give it a chance to terminate.
868 			 */
869 			if (nf_ct_kill(ct))
870 				return -NF_REPEAT;
871 			return NF_DROP;
872 		}
873 		/* Fall through */
874 	case TCP_CONNTRACK_IGNORE:
875 		/* Ignored packets:
876 		 *
877 		 * Our connection entry may be out of sync, so ignore
878 		 * packets which may signal the real connection between
879 		 * the client and the server.
880 		 *
881 		 * a) SYN in ORIGINAL
882 		 * b) SYN/ACK in REPLY
883 		 * c) ACK in reply direction after initial SYN in original.
884 		 *
885 		 * If the ignored packet is invalid, the receiver will send
886 		 * a RST we'll catch below.
887 		 */
888 		if (index == TCP_SYNACK_SET
889 		    && ct->proto.tcp.last_index == TCP_SYN_SET
890 		    && ct->proto.tcp.last_dir != dir
891 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
892 			/* b) This SYN/ACK acknowledges a SYN that we earlier
893 			 * ignored as invalid. This means that the client and
894 			 * the server are both in sync, while the firewall is
895 			 * not. We get in sync from the previously annotated
896 			 * values.
897 			 */
898 			old_state = TCP_CONNTRACK_SYN_SENT;
899 			new_state = TCP_CONNTRACK_SYN_RECV;
900 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
901 				ct->proto.tcp.last_end;
902 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
903 				ct->proto.tcp.last_end;
904 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
905 				ct->proto.tcp.last_win == 0 ?
906 					1 : ct->proto.tcp.last_win;
907 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
908 				ct->proto.tcp.last_wscale;
909 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
910 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
911 				ct->proto.tcp.last_flags;
912 			memset(&ct->proto.tcp.seen[dir], 0,
913 			       sizeof(struct ip_ct_tcp_state));
914 			break;
915 		}
916 		ct->proto.tcp.last_index = index;
917 		ct->proto.tcp.last_dir = dir;
918 		ct->proto.tcp.last_seq = ntohl(th->seq);
919 		ct->proto.tcp.last_end =
920 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
921 		ct->proto.tcp.last_win = ntohs(th->window);
922 
923 		/* a) This is a SYN in ORIGINAL. The client and the server
924 		 * may be in sync but we are not. In that case, we annotate
925 		 * the TCP options and let the packet go through. If it is a
926 		 * valid SYN packet, the server will reply with a SYN/ACK, and
927 		 * then we'll get in sync. Otherwise, the server potentially
928 		 * responds with a challenge ACK if implementing RFC5961.
929 		 */
930 		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
931 			struct ip_ct_tcp_state seen = {};
932 
933 			ct->proto.tcp.last_flags =
934 			ct->proto.tcp.last_wscale = 0;
935 			tcp_options(skb, dataoff, th, &seen);
936 			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
937 				ct->proto.tcp.last_flags |=
938 					IP_CT_TCP_FLAG_WINDOW_SCALE;
939 				ct->proto.tcp.last_wscale = seen.td_scale;
940 			}
941 			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
942 				ct->proto.tcp.last_flags |=
943 					IP_CT_TCP_FLAG_SACK_PERM;
944 			}
945 			/* Mark the potential for RFC5961 challenge ACK,
946 			 * this pose a special problem for LAST_ACK state
947 			 * as ACK is intrepretated as ACKing last FIN.
948 			 */
949 			if (old_state == TCP_CONNTRACK_LAST_ACK)
950 				ct->proto.tcp.last_flags |=
951 					IP_CT_EXP_CHALLENGE_ACK;
952 		}
953 		spin_unlock_bh(&ct->lock);
954 		if (LOG_INVALID(net, IPPROTO_TCP))
955 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
956 				  "nf_ct_tcp: invalid packet ignored in "
957 				  "state %s ", tcp_conntrack_names[old_state]);
958 		return NF_ACCEPT;
959 	case TCP_CONNTRACK_MAX:
960 		/* Special case for SYN proxy: when the SYN to the server or
961 		 * the SYN/ACK from the server is lost, the client may transmit
962 		 * a keep-alive packet while in SYN_SENT state. This needs to
963 		 * be associated with the original conntrack entry in order to
964 		 * generate a new SYN with the correct sequence number.
965 		 */
966 		if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
967 		    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
968 		    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
969 		    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
970 			pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
971 			spin_unlock_bh(&ct->lock);
972 			return NF_ACCEPT;
973 		}
974 
975 		/* Invalid packet */
976 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
977 			 dir, get_conntrack_index(th), old_state);
978 		spin_unlock_bh(&ct->lock);
979 		if (LOG_INVALID(net, IPPROTO_TCP))
980 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
981 				  "nf_ct_tcp: invalid state ");
982 		return -NF_ACCEPT;
983 	case TCP_CONNTRACK_TIME_WAIT:
984 		/* RFC5961 compliance cause stack to send "challenge-ACK"
985 		 * e.g. in response to spurious SYNs.  Conntrack MUST
986 		 * not believe this ACK is acking last FIN.
987 		 */
988 		if (old_state == TCP_CONNTRACK_LAST_ACK &&
989 		    index == TCP_ACK_SET &&
990 		    ct->proto.tcp.last_dir != dir &&
991 		    ct->proto.tcp.last_index == TCP_SYN_SET &&
992 		    (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
993 			/* Detected RFC5961 challenge ACK */
994 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
995 			spin_unlock_bh(&ct->lock);
996 			if (LOG_INVALID(net, IPPROTO_TCP))
997 				nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
998 				      "nf_ct_tcp: challenge-ACK ignored ");
999 			return NF_ACCEPT; /* Don't change state */
1000 		}
1001 		break;
1002 	case TCP_CONNTRACK_CLOSE:
1003 		if (index == TCP_RST_SET
1004 		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
1005 		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
1006 			/* Invalid RST  */
1007 			spin_unlock_bh(&ct->lock);
1008 			if (LOG_INVALID(net, IPPROTO_TCP))
1009 				nf_log_packet(net, pf, 0, skb, NULL, NULL,
1010 					      NULL, "nf_ct_tcp: invalid RST ");
1011 			return -NF_ACCEPT;
1012 		}
1013 		if (index == TCP_RST_SET
1014 		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1015 			 && ct->proto.tcp.last_index == TCP_SYN_SET)
1016 			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
1017 			    && ct->proto.tcp.last_index == TCP_ACK_SET))
1018 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1019 			/* RST sent to invalid SYN or ACK we had let through
1020 			 * at a) and c) above:
1021 			 *
1022 			 * a) SYN was in window then
1023 			 * c) we hold a half-open connection.
1024 			 *
1025 			 * Delete our connection entry.
1026 			 * We skip window checking, because packet might ACK
1027 			 * segments we ignored. */
1028 			goto in_window;
1029 		}
1030 		/* Just fall through */
1031 	default:
1032 		/* Keep compilers happy. */
1033 		break;
1034 	}
1035 
1036 	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1037 			   skb, dataoff, th, pf)) {
1038 		spin_unlock_bh(&ct->lock);
1039 		return -NF_ACCEPT;
1040 	}
1041      in_window:
1042 	/* From now on we have got in-window packets */
1043 	ct->proto.tcp.last_index = index;
1044 	ct->proto.tcp.last_dir = dir;
1045 
1046 	pr_debug("tcp_conntracks: ");
1047 	nf_ct_dump_tuple(tuple);
1048 	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1049 		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1050 		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1051 		 old_state, new_state);
1052 
1053 	ct->proto.tcp.state = new_state;
1054 	if (old_state != new_state
1055 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1056 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1057 
1058 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1059 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1060 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1061 	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1062 		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1063 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1064 		timeout = timeouts[TCP_CONNTRACK_UNACK];
1065 	else
1066 		timeout = timeouts[new_state];
1067 	spin_unlock_bh(&ct->lock);
1068 
1069 	if (new_state != old_state)
1070 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1071 
1072 	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1073 		/* If only reply is a RST, we can consider ourselves not to
1074 		   have an established connection: this is a fairly common
1075 		   problem case, so we can delete the conntrack
1076 		   immediately.  --RR */
1077 		if (th->rst) {
1078 			nf_ct_kill_acct(ct, ctinfo, skb);
1079 			return NF_ACCEPT;
1080 		}
1081 		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1082 		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1083 		 */
1084 		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1085 		    timeout > timeouts[TCP_CONNTRACK_UNACK])
1086 			timeout = timeouts[TCP_CONNTRACK_UNACK];
1087 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1088 		   && (old_state == TCP_CONNTRACK_SYN_RECV
1089 		       || old_state == TCP_CONNTRACK_ESTABLISHED)
1090 		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1091 		/* Set ASSURED if we see see valid ack in ESTABLISHED
1092 		   after SYN_RECV or a valid answer for a picked up
1093 		   connection. */
1094 		set_bit(IPS_ASSURED_BIT, &ct->status);
1095 		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1096 	}
1097 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1098 
1099 	return NF_ACCEPT;
1100 }
1101 
1102 /* Called when a new connection for this protocol found. */
1103 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1104 		    unsigned int dataoff, unsigned int *timeouts)
1105 {
1106 	enum tcp_conntrack new_state;
1107 	const struct tcphdr *th;
1108 	struct tcphdr _tcph;
1109 	struct net *net = nf_ct_net(ct);
1110 	struct nf_tcp_net *tn = tcp_pernet(net);
1111 	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1112 	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1113 
1114 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1115 	BUG_ON(th == NULL);
1116 
1117 	/* Don't need lock here: this conntrack not in circulation yet */
1118 	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1119 
1120 	/* Invalid: delete conntrack */
1121 	if (new_state >= TCP_CONNTRACK_MAX) {
1122 		pr_debug("nf_ct_tcp: invalid new deleting.\n");
1123 		return false;
1124 	}
1125 
1126 	if (new_state == TCP_CONNTRACK_SYN_SENT) {
1127 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1128 		/* SYN packet */
1129 		ct->proto.tcp.seen[0].td_end =
1130 			segment_seq_plus_len(ntohl(th->seq), skb->len,
1131 					     dataoff, th);
1132 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1133 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1134 			ct->proto.tcp.seen[0].td_maxwin = 1;
1135 		ct->proto.tcp.seen[0].td_maxend =
1136 			ct->proto.tcp.seen[0].td_end;
1137 
1138 		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1139 	} else if (tn->tcp_loose == 0) {
1140 		/* Don't try to pick up connections. */
1141 		return false;
1142 	} else {
1143 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1144 		/*
1145 		 * We are in the middle of a connection,
1146 		 * its history is lost for us.
1147 		 * Let's try to use the data from the packet.
1148 		 */
1149 		ct->proto.tcp.seen[0].td_end =
1150 			segment_seq_plus_len(ntohl(th->seq), skb->len,
1151 					     dataoff, th);
1152 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1153 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1154 			ct->proto.tcp.seen[0].td_maxwin = 1;
1155 		ct->proto.tcp.seen[0].td_maxend =
1156 			ct->proto.tcp.seen[0].td_end +
1157 			ct->proto.tcp.seen[0].td_maxwin;
1158 
1159 		/* We assume SACK and liberal window checking to handle
1160 		 * window scaling */
1161 		ct->proto.tcp.seen[0].flags =
1162 		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1163 					      IP_CT_TCP_FLAG_BE_LIBERAL;
1164 	}
1165 
1166 	/* tcp_packet will set them */
1167 	ct->proto.tcp.last_index = TCP_NONE_SET;
1168 
1169 	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1170 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1171 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
1172 		 sender->td_scale,
1173 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1174 		 receiver->td_scale);
1175 	return true;
1176 }
1177 
1178 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1179 
1180 #include <linux/netfilter/nfnetlink.h>
1181 #include <linux/netfilter/nfnetlink_conntrack.h>
1182 
1183 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1184 			 struct nf_conn *ct)
1185 {
1186 	struct nlattr *nest_parms;
1187 	struct nf_ct_tcp_flags tmp = {};
1188 
1189 	spin_lock_bh(&ct->lock);
1190 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1191 	if (!nest_parms)
1192 		goto nla_put_failure;
1193 
1194 	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1195 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1196 		       ct->proto.tcp.seen[0].td_scale) ||
1197 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1198 		       ct->proto.tcp.seen[1].td_scale))
1199 		goto nla_put_failure;
1200 
1201 	tmp.flags = ct->proto.tcp.seen[0].flags;
1202 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1203 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1204 		goto nla_put_failure;
1205 
1206 	tmp.flags = ct->proto.tcp.seen[1].flags;
1207 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1208 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1209 		goto nla_put_failure;
1210 	spin_unlock_bh(&ct->lock);
1211 
1212 	nla_nest_end(skb, nest_parms);
1213 
1214 	return 0;
1215 
1216 nla_put_failure:
1217 	spin_unlock_bh(&ct->lock);
1218 	return -1;
1219 }
1220 
1221 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1222 	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
1223 	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1224 	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1225 	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1226 	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
1227 };
1228 
1229 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1230 {
1231 	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1232 	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1233 	int err;
1234 
1235 	/* updates could not contain anything about the private
1236 	 * protocol info, in that case skip the parsing */
1237 	if (!pattr)
1238 		return 0;
1239 
1240 	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1241 	if (err < 0)
1242 		return err;
1243 
1244 	if (tb[CTA_PROTOINFO_TCP_STATE] &&
1245 	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1246 		return -EINVAL;
1247 
1248 	spin_lock_bh(&ct->lock);
1249 	if (tb[CTA_PROTOINFO_TCP_STATE])
1250 		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1251 
1252 	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1253 		struct nf_ct_tcp_flags *attr =
1254 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1255 		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1256 		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1257 	}
1258 
1259 	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1260 		struct nf_ct_tcp_flags *attr =
1261 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1262 		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1263 		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1264 	}
1265 
1266 	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1267 	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1268 	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1269 	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1270 		ct->proto.tcp.seen[0].td_scale =
1271 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1272 		ct->proto.tcp.seen[1].td_scale =
1273 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1274 	}
1275 	spin_unlock_bh(&ct->lock);
1276 
1277 	return 0;
1278 }
1279 
1280 static int tcp_nlattr_size(void)
1281 {
1282 	return nla_total_size(0)	   /* CTA_PROTOINFO_TCP */
1283 		+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1284 }
1285 
1286 static int tcp_nlattr_tuple_size(void)
1287 {
1288 	return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1289 }
1290 #endif
1291 
1292 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1293 
1294 #include <linux/netfilter/nfnetlink.h>
1295 #include <linux/netfilter/nfnetlink_cttimeout.h>
1296 
1297 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1298 				     struct net *net, void *data)
1299 {
1300 	unsigned int *timeouts = data;
1301 	struct nf_tcp_net *tn = tcp_pernet(net);
1302 	int i;
1303 
1304 	/* set default TCP timeouts. */
1305 	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1306 		timeouts[i] = tn->timeouts[i];
1307 
1308 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1309 		timeouts[TCP_CONNTRACK_SYN_SENT] =
1310 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1311 	}
1312 	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1313 		timeouts[TCP_CONNTRACK_SYN_RECV] =
1314 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1315 	}
1316 	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1317 		timeouts[TCP_CONNTRACK_ESTABLISHED] =
1318 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1319 	}
1320 	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1321 		timeouts[TCP_CONNTRACK_FIN_WAIT] =
1322 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1323 	}
1324 	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1325 		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1326 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1327 	}
1328 	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1329 		timeouts[TCP_CONNTRACK_LAST_ACK] =
1330 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1331 	}
1332 	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1333 		timeouts[TCP_CONNTRACK_TIME_WAIT] =
1334 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1335 	}
1336 	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1337 		timeouts[TCP_CONNTRACK_CLOSE] =
1338 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1339 	}
1340 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1341 		timeouts[TCP_CONNTRACK_SYN_SENT2] =
1342 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1343 	}
1344 	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1345 		timeouts[TCP_CONNTRACK_RETRANS] =
1346 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1347 	}
1348 	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1349 		timeouts[TCP_CONNTRACK_UNACK] =
1350 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1351 	}
1352 	return 0;
1353 }
1354 
1355 static int
1356 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1357 {
1358 	const unsigned int *timeouts = data;
1359 
1360 	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1361 			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1362 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1363 			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1364 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1365 			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1366 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1367 			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1368 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1369 			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1370 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1371 			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1372 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1373 			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1374 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1375 			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1376 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1377 			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1378 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1379 			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1380 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1381 			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1382 		goto nla_put_failure;
1383 	return 0;
1384 
1385 nla_put_failure:
1386 	return -ENOSPC;
1387 }
1388 
1389 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1390 	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
1391 	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
1392 	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
1393 	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
1394 	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
1395 	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
1396 	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
1397 	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
1398 	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
1399 	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
1400 	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
1401 };
1402 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1403 
1404 #ifdef CONFIG_SYSCTL
1405 static struct ctl_table tcp_sysctl_table[] = {
1406 	{
1407 		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
1408 		.maxlen		= sizeof(unsigned int),
1409 		.mode		= 0644,
1410 		.proc_handler	= proc_dointvec_jiffies,
1411 	},
1412 	{
1413 		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
1414 		.maxlen		= sizeof(unsigned int),
1415 		.mode		= 0644,
1416 		.proc_handler	= proc_dointvec_jiffies,
1417 	},
1418 	{
1419 		.procname	= "nf_conntrack_tcp_timeout_established",
1420 		.maxlen		= sizeof(unsigned int),
1421 		.mode		= 0644,
1422 		.proc_handler	= proc_dointvec_jiffies,
1423 	},
1424 	{
1425 		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
1426 		.maxlen		= sizeof(unsigned int),
1427 		.mode		= 0644,
1428 		.proc_handler	= proc_dointvec_jiffies,
1429 	},
1430 	{
1431 		.procname	= "nf_conntrack_tcp_timeout_close_wait",
1432 		.maxlen		= sizeof(unsigned int),
1433 		.mode		= 0644,
1434 		.proc_handler	= proc_dointvec_jiffies,
1435 	},
1436 	{
1437 		.procname	= "nf_conntrack_tcp_timeout_last_ack",
1438 		.maxlen		= sizeof(unsigned int),
1439 		.mode		= 0644,
1440 		.proc_handler	= proc_dointvec_jiffies,
1441 	},
1442 	{
1443 		.procname	= "nf_conntrack_tcp_timeout_time_wait",
1444 		.maxlen		= sizeof(unsigned int),
1445 		.mode		= 0644,
1446 		.proc_handler	= proc_dointvec_jiffies,
1447 	},
1448 	{
1449 		.procname	= "nf_conntrack_tcp_timeout_close",
1450 		.maxlen		= sizeof(unsigned int),
1451 		.mode		= 0644,
1452 		.proc_handler	= proc_dointvec_jiffies,
1453 	},
1454 	{
1455 		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
1456 		.maxlen		= sizeof(unsigned int),
1457 		.mode		= 0644,
1458 		.proc_handler	= proc_dointvec_jiffies,
1459 	},
1460 	{
1461 		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
1462 		.maxlen		= sizeof(unsigned int),
1463 		.mode		= 0644,
1464 		.proc_handler	= proc_dointvec_jiffies,
1465 	},
1466 	{
1467 		.procname	= "nf_conntrack_tcp_loose",
1468 		.maxlen		= sizeof(unsigned int),
1469 		.mode		= 0644,
1470 		.proc_handler	= proc_dointvec,
1471 	},
1472 	{
1473 		.procname       = "nf_conntrack_tcp_be_liberal",
1474 		.maxlen         = sizeof(unsigned int),
1475 		.mode           = 0644,
1476 		.proc_handler   = proc_dointvec,
1477 	},
1478 	{
1479 		.procname	= "nf_conntrack_tcp_max_retrans",
1480 		.maxlen		= sizeof(unsigned int),
1481 		.mode		= 0644,
1482 		.proc_handler	= proc_dointvec,
1483 	},
1484 	{ }
1485 };
1486 
1487 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1488 static struct ctl_table tcp_compat_sysctl_table[] = {
1489 	{
1490 		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
1491 		.maxlen		= sizeof(unsigned int),
1492 		.mode		= 0644,
1493 		.proc_handler	= proc_dointvec_jiffies,
1494 	},
1495 	{
1496 		.procname	= "ip_conntrack_tcp_timeout_syn_sent2",
1497 		.maxlen		= sizeof(unsigned int),
1498 		.mode		= 0644,
1499 		.proc_handler	= proc_dointvec_jiffies,
1500 	},
1501 	{
1502 		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
1503 		.maxlen		= sizeof(unsigned int),
1504 		.mode		= 0644,
1505 		.proc_handler	= proc_dointvec_jiffies,
1506 	},
1507 	{
1508 		.procname	= "ip_conntrack_tcp_timeout_established",
1509 		.maxlen		= sizeof(unsigned int),
1510 		.mode		= 0644,
1511 		.proc_handler	= proc_dointvec_jiffies,
1512 	},
1513 	{
1514 		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
1515 		.maxlen		= sizeof(unsigned int),
1516 		.mode		= 0644,
1517 		.proc_handler	= proc_dointvec_jiffies,
1518 	},
1519 	{
1520 		.procname	= "ip_conntrack_tcp_timeout_close_wait",
1521 		.maxlen		= sizeof(unsigned int),
1522 		.mode		= 0644,
1523 		.proc_handler	= proc_dointvec_jiffies,
1524 	},
1525 	{
1526 		.procname	= "ip_conntrack_tcp_timeout_last_ack",
1527 		.maxlen		= sizeof(unsigned int),
1528 		.mode		= 0644,
1529 		.proc_handler	= proc_dointvec_jiffies,
1530 	},
1531 	{
1532 		.procname	= "ip_conntrack_tcp_timeout_time_wait",
1533 		.maxlen		= sizeof(unsigned int),
1534 		.mode		= 0644,
1535 		.proc_handler	= proc_dointvec_jiffies,
1536 	},
1537 	{
1538 		.procname	= "ip_conntrack_tcp_timeout_close",
1539 		.maxlen		= sizeof(unsigned int),
1540 		.mode		= 0644,
1541 		.proc_handler	= proc_dointvec_jiffies,
1542 	},
1543 	{
1544 		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
1545 		.maxlen		= sizeof(unsigned int),
1546 		.mode		= 0644,
1547 		.proc_handler	= proc_dointvec_jiffies,
1548 	},
1549 	{
1550 		.procname	= "ip_conntrack_tcp_loose",
1551 		.maxlen		= sizeof(unsigned int),
1552 		.mode		= 0644,
1553 		.proc_handler	= proc_dointvec,
1554 	},
1555 	{
1556 		.procname	= "ip_conntrack_tcp_be_liberal",
1557 		.maxlen		= sizeof(unsigned int),
1558 		.mode		= 0644,
1559 		.proc_handler	= proc_dointvec,
1560 	},
1561 	{
1562 		.procname	= "ip_conntrack_tcp_max_retrans",
1563 		.maxlen		= sizeof(unsigned int),
1564 		.mode		= 0644,
1565 		.proc_handler	= proc_dointvec,
1566 	},
1567 	{ }
1568 };
1569 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1570 #endif /* CONFIG_SYSCTL */
1571 
1572 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1573 				    struct nf_tcp_net *tn)
1574 {
1575 #ifdef CONFIG_SYSCTL
1576 	if (pn->ctl_table)
1577 		return 0;
1578 
1579 	pn->ctl_table = kmemdup(tcp_sysctl_table,
1580 				sizeof(tcp_sysctl_table),
1581 				GFP_KERNEL);
1582 	if (!pn->ctl_table)
1583 		return -ENOMEM;
1584 
1585 	pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1586 	pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1587 	pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1588 	pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1589 	pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1590 	pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1591 	pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1592 	pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1593 	pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1594 	pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1595 	pn->ctl_table[10].data = &tn->tcp_loose;
1596 	pn->ctl_table[11].data = &tn->tcp_be_liberal;
1597 	pn->ctl_table[12].data = &tn->tcp_max_retrans;
1598 #endif
1599 	return 0;
1600 }
1601 
1602 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1603 					   struct nf_tcp_net *tn)
1604 {
1605 #ifdef CONFIG_SYSCTL
1606 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1607 	pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1608 				       sizeof(tcp_compat_sysctl_table),
1609 				       GFP_KERNEL);
1610 	if (!pn->ctl_compat_table)
1611 		return -ENOMEM;
1612 
1613 	pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1614 	pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1615 	pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1616 	pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1617 	pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1618 	pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1619 	pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1620 	pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1621 	pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1622 	pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1623 	pn->ctl_compat_table[10].data = &tn->tcp_loose;
1624 	pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1625 	pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1626 #endif
1627 #endif
1628 	return 0;
1629 }
1630 
1631 static int tcp_init_net(struct net *net, u_int16_t proto)
1632 {
1633 	int ret;
1634 	struct nf_tcp_net *tn = tcp_pernet(net);
1635 	struct nf_proto_net *pn = &tn->pn;
1636 
1637 	if (!pn->users) {
1638 		int i;
1639 
1640 		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1641 			tn->timeouts[i] = tcp_timeouts[i];
1642 
1643 		tn->tcp_loose = nf_ct_tcp_loose;
1644 		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1645 		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1646 	}
1647 
1648 	if (proto == AF_INET) {
1649 		ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1650 		if (ret < 0)
1651 			return ret;
1652 
1653 		ret = tcp_kmemdup_sysctl_table(pn, tn);
1654 		if (ret < 0)
1655 			nf_ct_kfree_compat_sysctl_table(pn);
1656 	} else
1657 		ret = tcp_kmemdup_sysctl_table(pn, tn);
1658 
1659 	return ret;
1660 }
1661 
1662 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1663 {
1664 	return &net->ct.nf_ct_proto.tcp.pn;
1665 }
1666 
1667 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1668 {
1669 	.l3proto		= PF_INET,
1670 	.l4proto 		= IPPROTO_TCP,
1671 	.name 			= "tcp",
1672 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1673 	.invert_tuple 		= tcp_invert_tuple,
1674 	.print_tuple 		= tcp_print_tuple,
1675 	.print_conntrack 	= tcp_print_conntrack,
1676 	.packet 		= tcp_packet,
1677 	.get_timeouts		= tcp_get_timeouts,
1678 	.new 			= tcp_new,
1679 	.error			= tcp_error,
1680 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1681 	.to_nlattr		= tcp_to_nlattr,
1682 	.nlattr_size		= tcp_nlattr_size,
1683 	.from_nlattr		= nlattr_to_tcp,
1684 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1685 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1686 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1687 	.nla_policy		= nf_ct_port_nla_policy,
1688 #endif
1689 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1690 	.ctnl_timeout		= {
1691 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1692 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1693 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1694 		.obj_size	= sizeof(unsigned int) *
1695 					TCP_CONNTRACK_TIMEOUT_MAX,
1696 		.nla_policy	= tcp_timeout_nla_policy,
1697 	},
1698 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1699 	.init_net		= tcp_init_net,
1700 	.get_net_proto		= tcp_get_net_proto,
1701 };
1702 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1703 
1704 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1705 {
1706 	.l3proto		= PF_INET6,
1707 	.l4proto 		= IPPROTO_TCP,
1708 	.name 			= "tcp",
1709 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1710 	.invert_tuple 		= tcp_invert_tuple,
1711 	.print_tuple 		= tcp_print_tuple,
1712 	.print_conntrack 	= tcp_print_conntrack,
1713 	.packet 		= tcp_packet,
1714 	.get_timeouts		= tcp_get_timeouts,
1715 	.new 			= tcp_new,
1716 	.error			= tcp_error,
1717 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1718 	.to_nlattr		= tcp_to_nlattr,
1719 	.nlattr_size		= tcp_nlattr_size,
1720 	.from_nlattr		= nlattr_to_tcp,
1721 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1722 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1723 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1724 	.nla_policy		= nf_ct_port_nla_policy,
1725 #endif
1726 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1727 	.ctnl_timeout		= {
1728 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1729 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1730 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1731 		.obj_size	= sizeof(unsigned int) *
1732 					TCP_CONNTRACK_TIMEOUT_MAX,
1733 		.nla_policy	= tcp_timeout_nla_policy,
1734 	},
1735 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1736 	.init_net		= tcp_init_net,
1737 	.get_net_proto		= tcp_get_net_proto,
1738 };
1739 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1740