1 #include <linux/kernel.h>
2 #include <linux/ip.h>
3 #include <linux/sctp.h>
4 #include <net/ip.h>
5 #include <net/ip6_checksum.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <net/sctp/checksum.h>
9 #include <net/ip_vs.h>
10 
11 static int
12 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
13 		   struct ip_vs_proto_data *pd,
14 		   int *verdict, struct ip_vs_conn **cpp,
15 		   struct ip_vs_iphdr *iph)
16 {
17 	struct ip_vs_service *svc;
18 	sctp_chunkhdr_t _schunkh, *sch;
19 	sctp_sctphdr_t *sh, _sctph;
20 	__be16 _ports[2], *ports = NULL;
21 
22 	if (likely(!ip_vs_iph_icmp(iph))) {
23 		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
24 		if (sh) {
25 			sch = skb_header_pointer(
26 				skb, iph->len + sizeof(sctp_sctphdr_t),
27 				sizeof(_schunkh), &_schunkh);
28 			if (sch && (sch->type == SCTP_CID_INIT ||
29 				    sysctl_sloppy_sctp(ipvs)))
30 				ports = &sh->source;
31 		}
32 	} else {
33 		ports = skb_header_pointer(
34 			skb, iph->len, sizeof(_ports), &_ports);
35 	}
36 
37 	if (!ports) {
38 		*verdict = NF_DROP;
39 		return 0;
40 	}
41 
42 	rcu_read_lock();
43 	if (likely(!ip_vs_iph_inverse(iph)))
44 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
45 					 &iph->daddr, ports[1]);
46 	else
47 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
48 					 &iph->saddr, ports[0]);
49 	if (svc) {
50 		int ignored;
51 
52 		if (ip_vs_todrop(ipvs)) {
53 			/*
54 			 * It seems that we are very loaded.
55 			 * We have to drop this packet :(
56 			 */
57 			rcu_read_unlock();
58 			*verdict = NF_DROP;
59 			return 0;
60 		}
61 		/*
62 		 * Let the virtual server select a real server for the
63 		 * incoming connection, and create a connection entry.
64 		 */
65 		*cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
66 		if (!*cpp && ignored <= 0) {
67 			if (!ignored)
68 				*verdict = ip_vs_leave(svc, skb, pd, iph);
69 			else
70 				*verdict = NF_DROP;
71 			rcu_read_unlock();
72 			return 0;
73 		}
74 	}
75 	rcu_read_unlock();
76 	/* NF_ACCEPT */
77 	return 1;
78 }
79 
80 static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
81 			  unsigned int sctphoff)
82 {
83 	sctph->checksum = sctp_compute_cksum(skb, sctphoff);
84 	skb->ip_summed = CHECKSUM_UNNECESSARY;
85 }
86 
87 static int
88 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
89 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
90 {
91 	sctp_sctphdr_t *sctph;
92 	unsigned int sctphoff = iph->len;
93 	bool payload_csum = false;
94 
95 #ifdef CONFIG_IP_VS_IPV6
96 	if (cp->af == AF_INET6 && iph->fragoffs)
97 		return 1;
98 #endif
99 
100 	/* csum_check requires unshared skb */
101 	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
102 		return 0;
103 
104 	if (unlikely(cp->app != NULL)) {
105 		int ret;
106 
107 		/* Some checks before mangling */
108 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
109 			return 0;
110 
111 		/* Call application helper if needed */
112 		ret = ip_vs_app_pkt_out(cp, skb);
113 		if (ret == 0)
114 			return 0;
115 		/* ret=2: csum update is needed after payload mangling */
116 		if (ret == 2)
117 			payload_csum = true;
118 	}
119 
120 	sctph = (void *) skb_network_header(skb) + sctphoff;
121 
122 	/* Only update csum if we really have to */
123 	if (sctph->source != cp->vport || payload_csum ||
124 	    skb->ip_summed == CHECKSUM_PARTIAL) {
125 		sctph->source = cp->vport;
126 		sctp_nat_csum(skb, sctph, sctphoff);
127 	} else {
128 		skb->ip_summed = CHECKSUM_UNNECESSARY;
129 	}
130 
131 	return 1;
132 }
133 
134 static int
135 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
136 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
137 {
138 	sctp_sctphdr_t *sctph;
139 	unsigned int sctphoff = iph->len;
140 	bool payload_csum = false;
141 
142 #ifdef CONFIG_IP_VS_IPV6
143 	if (cp->af == AF_INET6 && iph->fragoffs)
144 		return 1;
145 #endif
146 
147 	/* csum_check requires unshared skb */
148 	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
149 		return 0;
150 
151 	if (unlikely(cp->app != NULL)) {
152 		int ret;
153 
154 		/* Some checks before mangling */
155 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
156 			return 0;
157 
158 		/* Call application helper if needed */
159 		ret = ip_vs_app_pkt_in(cp, skb);
160 		if (ret == 0)
161 			return 0;
162 		/* ret=2: csum update is needed after payload mangling */
163 		if (ret == 2)
164 			payload_csum = true;
165 	}
166 
167 	sctph = (void *) skb_network_header(skb) + sctphoff;
168 
169 	/* Only update csum if we really have to */
170 	if (sctph->dest != cp->dport || payload_csum ||
171 	    (skb->ip_summed == CHECKSUM_PARTIAL &&
172 	     !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
173 		sctph->dest = cp->dport;
174 		sctp_nat_csum(skb, sctph, sctphoff);
175 	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
176 		skb->ip_summed = CHECKSUM_UNNECESSARY;
177 	}
178 
179 	return 1;
180 }
181 
182 static int
183 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
184 {
185 	unsigned int sctphoff;
186 	struct sctphdr *sh, _sctph;
187 	__le32 cmp, val;
188 
189 #ifdef CONFIG_IP_VS_IPV6
190 	if (af == AF_INET6)
191 		sctphoff = sizeof(struct ipv6hdr);
192 	else
193 #endif
194 		sctphoff = ip_hdrlen(skb);
195 
196 	sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
197 	if (sh == NULL)
198 		return 0;
199 
200 	cmp = sh->checksum;
201 	val = sctp_compute_cksum(skb, sctphoff);
202 
203 	if (val != cmp) {
204 		/* CRC failure, dump it. */
205 		IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
206 				"Failed checksum for");
207 		return 0;
208 	}
209 	return 1;
210 }
211 
212 enum ipvs_sctp_event_t {
213 	IP_VS_SCTP_DATA = 0,		/* DATA, SACK, HEARTBEATs */
214 	IP_VS_SCTP_INIT,
215 	IP_VS_SCTP_INIT_ACK,
216 	IP_VS_SCTP_COOKIE_ECHO,
217 	IP_VS_SCTP_COOKIE_ACK,
218 	IP_VS_SCTP_SHUTDOWN,
219 	IP_VS_SCTP_SHUTDOWN_ACK,
220 	IP_VS_SCTP_SHUTDOWN_COMPLETE,
221 	IP_VS_SCTP_ERROR,
222 	IP_VS_SCTP_ABORT,
223 	IP_VS_SCTP_EVENT_LAST
224 };
225 
226 /* RFC 2960, 3.2 Chunk Field Descriptions */
227 static __u8 sctp_events[] = {
228 	[SCTP_CID_DATA]			= IP_VS_SCTP_DATA,
229 	[SCTP_CID_INIT]			= IP_VS_SCTP_INIT,
230 	[SCTP_CID_INIT_ACK]		= IP_VS_SCTP_INIT_ACK,
231 	[SCTP_CID_SACK]			= IP_VS_SCTP_DATA,
232 	[SCTP_CID_HEARTBEAT]		= IP_VS_SCTP_DATA,
233 	[SCTP_CID_HEARTBEAT_ACK]	= IP_VS_SCTP_DATA,
234 	[SCTP_CID_ABORT]		= IP_VS_SCTP_ABORT,
235 	[SCTP_CID_SHUTDOWN]		= IP_VS_SCTP_SHUTDOWN,
236 	[SCTP_CID_SHUTDOWN_ACK]		= IP_VS_SCTP_SHUTDOWN_ACK,
237 	[SCTP_CID_ERROR]		= IP_VS_SCTP_ERROR,
238 	[SCTP_CID_COOKIE_ECHO]		= IP_VS_SCTP_COOKIE_ECHO,
239 	[SCTP_CID_COOKIE_ACK]		= IP_VS_SCTP_COOKIE_ACK,
240 	[SCTP_CID_ECN_ECNE]		= IP_VS_SCTP_DATA,
241 	[SCTP_CID_ECN_CWR]		= IP_VS_SCTP_DATA,
242 	[SCTP_CID_SHUTDOWN_COMPLETE]	= IP_VS_SCTP_SHUTDOWN_COMPLETE,
243 };
244 
245 /* SCTP States:
246  * See RFC 2960, 4. SCTP Association State Diagram
247  *
248  * New states (not in diagram):
249  * - INIT1 state: use shorter timeout for dropped INIT packets
250  * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
251  * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
252  *
253  * The states are as seen in real server. In the diagram, INIT1, INIT,
254  * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
255  *
256  * States as per packets from client (C) and server (S):
257  *
258  * Setup of client connection:
259  * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
260  * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
261  * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
262  * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
263  *
264  * Setup of server connection:
265  * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
266  * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
267  * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
268  */
269 
270 #define sNO IP_VS_SCTP_S_NONE
271 #define sI1 IP_VS_SCTP_S_INIT1
272 #define sIN IP_VS_SCTP_S_INIT
273 #define sCS IP_VS_SCTP_S_COOKIE_SENT
274 #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
275 #define sCW IP_VS_SCTP_S_COOKIE_WAIT
276 #define sCO IP_VS_SCTP_S_COOKIE
277 #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
278 #define sES IP_VS_SCTP_S_ESTABLISHED
279 #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
280 #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
281 #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
282 #define sRJ IP_VS_SCTP_S_REJECTED
283 #define sCL IP_VS_SCTP_S_CLOSED
284 
285 static const __u8 sctp_states
286 	[IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
287 	{ /* INPUT */
288 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
289 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
290 /* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
291 /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
292 /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
293 /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
294 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
295 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
296 /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
297 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
298 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
299 	},
300 	{ /* OUTPUT */
301 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
302 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
303 /* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
304 /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
305 /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
306 /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
307 /* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
308 /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
309 /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
310 /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
311 /* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
312 	},
313 	{ /* INPUT-ONLY */
314 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
315 /* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
316 /* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
317 /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
318 /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
319 /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
320 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
321 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
322 /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
323 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
324 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
325 	},
326 };
327 
328 #define IP_VS_SCTP_MAX_RTO	((60 + 1) * HZ)
329 
330 /* Timeout table[state] */
331 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
332 	[IP_VS_SCTP_S_NONE]			= 2 * HZ,
333 	[IP_VS_SCTP_S_INIT1]			= (0 + 3 + 1) * HZ,
334 	[IP_VS_SCTP_S_INIT]			= IP_VS_SCTP_MAX_RTO,
335 	[IP_VS_SCTP_S_COOKIE_SENT]		= IP_VS_SCTP_MAX_RTO,
336 	[IP_VS_SCTP_S_COOKIE_REPLIED]		= IP_VS_SCTP_MAX_RTO,
337 	[IP_VS_SCTP_S_COOKIE_WAIT]		= IP_VS_SCTP_MAX_RTO,
338 	[IP_VS_SCTP_S_COOKIE]			= IP_VS_SCTP_MAX_RTO,
339 	[IP_VS_SCTP_S_COOKIE_ECHOED]		= IP_VS_SCTP_MAX_RTO,
340 	[IP_VS_SCTP_S_ESTABLISHED]		= 15 * 60 * HZ,
341 	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= IP_VS_SCTP_MAX_RTO,
342 	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= IP_VS_SCTP_MAX_RTO,
343 	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= IP_VS_SCTP_MAX_RTO,
344 	[IP_VS_SCTP_S_REJECTED]			= (0 + 3 + 1) * HZ,
345 	[IP_VS_SCTP_S_CLOSED]			= IP_VS_SCTP_MAX_RTO,
346 	[IP_VS_SCTP_S_LAST]			= 2 * HZ,
347 };
348 
349 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
350 	[IP_VS_SCTP_S_NONE]			= "NONE",
351 	[IP_VS_SCTP_S_INIT1]			= "INIT1",
352 	[IP_VS_SCTP_S_INIT]			= "INIT",
353 	[IP_VS_SCTP_S_COOKIE_SENT]		= "C-SENT",
354 	[IP_VS_SCTP_S_COOKIE_REPLIED]		= "C-REPLIED",
355 	[IP_VS_SCTP_S_COOKIE_WAIT]		= "C-WAIT",
356 	[IP_VS_SCTP_S_COOKIE]			= "COOKIE",
357 	[IP_VS_SCTP_S_COOKIE_ECHOED]		= "C-ECHOED",
358 	[IP_VS_SCTP_S_ESTABLISHED]		= "ESTABLISHED",
359 	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= "S-SENT",
360 	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= "S-RECEIVED",
361 	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= "S-ACK-SENT",
362 	[IP_VS_SCTP_S_REJECTED]			= "REJECTED",
363 	[IP_VS_SCTP_S_CLOSED]			= "CLOSED",
364 	[IP_VS_SCTP_S_LAST]			= "BUG!",
365 };
366 
367 
368 static const char *sctp_state_name(int state)
369 {
370 	if (state >= IP_VS_SCTP_S_LAST)
371 		return "ERR!";
372 	if (sctp_state_name_table[state])
373 		return sctp_state_name_table[state];
374 	return "?";
375 }
376 
377 static inline void
378 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
379 		int direction, const struct sk_buff *skb)
380 {
381 	sctp_chunkhdr_t _sctpch, *sch;
382 	unsigned char chunk_type;
383 	int event, next_state;
384 	int ihl, cofs;
385 
386 #ifdef CONFIG_IP_VS_IPV6
387 	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
388 #else
389 	ihl = ip_hdrlen(skb);
390 #endif
391 
392 	cofs = ihl + sizeof(sctp_sctphdr_t);
393 	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
394 	if (sch == NULL)
395 		return;
396 
397 	chunk_type = sch->type;
398 	/*
399 	 * Section 3: Multiple chunks can be bundled into one SCTP packet
400 	 * up to the MTU size, except for the INIT, INIT ACK, and
401 	 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
402 	 * any other chunk in a packet.
403 	 *
404 	 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
405 	 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
406 	 * bundled with an ABORT, but they MUST be placed before the ABORT
407 	 * in the SCTP packet or they will be ignored by the receiver.
408 	 */
409 	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
410 	    (sch->type == SCTP_CID_COOKIE_ACK)) {
411 		int clen = ntohs(sch->length);
412 
413 		if (clen >= sizeof(sctp_chunkhdr_t)) {
414 			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
415 						 sizeof(_sctpch), &_sctpch);
416 			if (sch && sch->type == SCTP_CID_ABORT)
417 				chunk_type = sch->type;
418 		}
419 	}
420 
421 	event = (chunk_type < sizeof(sctp_events)) ?
422 		sctp_events[chunk_type] : IP_VS_SCTP_DATA;
423 
424 	/* Update direction to INPUT_ONLY if necessary
425 	 * or delete NO_OUTPUT flag if output packet detected
426 	 */
427 	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
428 		if (direction == IP_VS_DIR_OUTPUT)
429 			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
430 		else
431 			direction = IP_VS_DIR_INPUT_ONLY;
432 	}
433 
434 	next_state = sctp_states[direction][event][cp->state];
435 
436 	if (next_state != cp->state) {
437 		struct ip_vs_dest *dest = cp->dest;
438 
439 		IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
440 				"%s:%d state: %s->%s conn->refcnt:%d\n",
441 				pd->pp->name,
442 				((direction == IP_VS_DIR_OUTPUT) ?
443 				 "output " : "input "),
444 				IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
445 				ntohs(cp->dport),
446 				IP_VS_DBG_ADDR(cp->af, &cp->caddr),
447 				ntohs(cp->cport),
448 				sctp_state_name(cp->state),
449 				sctp_state_name(next_state),
450 				atomic_read(&cp->refcnt));
451 		if (dest) {
452 			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
453 				(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
454 				atomic_dec(&dest->activeconns);
455 				atomic_inc(&dest->inactconns);
456 				cp->flags |= IP_VS_CONN_F_INACTIVE;
457 			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
458 				   (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
459 				atomic_inc(&dest->activeconns);
460 				atomic_dec(&dest->inactconns);
461 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
462 			}
463 		}
464 	}
465 	if (likely(pd))
466 		cp->timeout = pd->timeout_table[cp->state = next_state];
467 	else	/* What to do ? */
468 		cp->timeout = sctp_timeouts[cp->state = next_state];
469 }
470 
471 static void
472 sctp_state_transition(struct ip_vs_conn *cp, int direction,
473 		const struct sk_buff *skb, struct ip_vs_proto_data *pd)
474 {
475 	spin_lock_bh(&cp->lock);
476 	set_sctp_state(pd, cp, direction, skb);
477 	spin_unlock_bh(&cp->lock);
478 }
479 
480 static inline __u16 sctp_app_hashkey(__be16 port)
481 {
482 	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
483 		& SCTP_APP_TAB_MASK;
484 }
485 
486 static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
487 {
488 	struct ip_vs_app *i;
489 	__u16 hash;
490 	__be16 port = inc->port;
491 	int ret = 0;
492 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
493 
494 	hash = sctp_app_hashkey(port);
495 
496 	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
497 		if (i->port == port) {
498 			ret = -EEXIST;
499 			goto out;
500 		}
501 	}
502 	list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
503 	atomic_inc(&pd->appcnt);
504 out:
505 
506 	return ret;
507 }
508 
509 static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
510 {
511 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
512 
513 	atomic_dec(&pd->appcnt);
514 	list_del_rcu(&inc->p_list);
515 }
516 
517 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
518 {
519 	struct netns_ipvs *ipvs = cp->ipvs;
520 	int hash;
521 	struct ip_vs_app *inc;
522 	int result = 0;
523 
524 	/* Default binding: bind app only for NAT */
525 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
526 		return 0;
527 	/* Lookup application incarnations and bind the right one */
528 	hash = sctp_app_hashkey(cp->vport);
529 
530 	rcu_read_lock();
531 	list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
532 		if (inc->port == cp->vport) {
533 			if (unlikely(!ip_vs_app_inc_get(inc)))
534 				break;
535 			rcu_read_unlock();
536 
537 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
538 					"%s:%u to app %s on port %u\n",
539 					__func__,
540 					IP_VS_DBG_ADDR(cp->af, &cp->caddr),
541 					ntohs(cp->cport),
542 					IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
543 					ntohs(cp->vport),
544 					inc->name, ntohs(inc->port));
545 			cp->app = inc;
546 			if (inc->init_conn)
547 				result = inc->init_conn(inc, cp);
548 			goto out;
549 		}
550 	}
551 	rcu_read_unlock();
552 out:
553 	return result;
554 }
555 
556 /* ---------------------------------------------
557  *   timeouts is netns related now.
558  * ---------------------------------------------
559  */
560 static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
561 {
562 	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
563 	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
564 							sizeof(sctp_timeouts));
565 	if (!pd->timeout_table)
566 		return -ENOMEM;
567 	return 0;
568 }
569 
570 static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
571 {
572 	kfree(pd->timeout_table);
573 }
574 
575 struct ip_vs_protocol ip_vs_protocol_sctp = {
576 	.name		= "SCTP",
577 	.protocol	= IPPROTO_SCTP,
578 	.num_states	= IP_VS_SCTP_S_LAST,
579 	.dont_defrag	= 0,
580 	.init		= NULL,
581 	.exit		= NULL,
582 	.init_netns	= __ip_vs_sctp_init,
583 	.exit_netns	= __ip_vs_sctp_exit,
584 	.register_app	= sctp_register_app,
585 	.unregister_app = sctp_unregister_app,
586 	.conn_schedule	= sctp_conn_schedule,
587 	.conn_in_get	= ip_vs_conn_in_get_proto,
588 	.conn_out_get	= ip_vs_conn_out_get_proto,
589 	.snat_handler	= sctp_snat_handler,
590 	.dnat_handler	= sctp_dnat_handler,
591 	.csum_check	= sctp_csum_check,
592 	.state_name	= sctp_state_name,
593 	.state_transition = sctp_state_transition,
594 	.app_conn_bind	= sctp_app_conn_bind,
595 	.debug_packet	= ip_vs_tcpudp_debug_packet,
596 	.timeout_change	= NULL,
597 };
598