1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/ip.h>
4 #include <linux/sctp.h>
5 #include <net/ip.h>
6 #include <net/ip6_checksum.h>
7 #include <linux/netfilter.h>
8 #include <linux/netfilter_ipv4.h>
9 #include <net/sctp/checksum.h>
10 #include <net/ip_vs.h>
11 
12 static int
13 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
14 
15 static int
sctp_conn_schedule(struct netns_ipvs * ipvs,int af,struct sk_buff * skb,struct ip_vs_proto_data * pd,int * verdict,struct ip_vs_conn ** cpp,struct ip_vs_iphdr * iph)16 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
17 		   struct ip_vs_proto_data *pd,
18 		   int *verdict, struct ip_vs_conn **cpp,
19 		   struct ip_vs_iphdr *iph)
20 {
21 	struct ip_vs_service *svc;
22 	struct sctp_chunkhdr _schunkh, *sch;
23 	struct sctphdr *sh, _sctph;
24 	__be16 _ports[2], *ports = NULL;
25 
26 	if (likely(!ip_vs_iph_icmp(iph))) {
27 		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
28 		if (sh) {
29 			sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
30 						 sizeof(_schunkh), &_schunkh);
31 			if (sch) {
32 				if (sch->type == SCTP_CID_ABORT ||
33 				    !(sysctl_sloppy_sctp(ipvs) ||
34 				      sch->type == SCTP_CID_INIT))
35 					return 1;
36 				ports = &sh->source;
37 			}
38 		}
39 	} else {
40 		ports = skb_header_pointer(
41 			skb, iph->len, sizeof(_ports), &_ports);
42 	}
43 
44 	if (!ports) {
45 		*verdict = NF_DROP;
46 		return 0;
47 	}
48 
49 	if (likely(!ip_vs_iph_inverse(iph)))
50 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
51 					 &iph->daddr, ports[1]);
52 	else
53 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
54 					 &iph->saddr, ports[0]);
55 	if (svc) {
56 		int ignored;
57 
58 		if (ip_vs_todrop(ipvs)) {
59 			/*
60 			 * It seems that we are very loaded.
61 			 * We have to drop this packet :(
62 			 */
63 			*verdict = NF_DROP;
64 			return 0;
65 		}
66 		/*
67 		 * Let the virtual server select a real server for the
68 		 * incoming connection, and create a connection entry.
69 		 */
70 		*cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
71 		if (!*cpp && ignored <= 0) {
72 			if (!ignored)
73 				*verdict = ip_vs_leave(svc, skb, pd, iph);
74 			else
75 				*verdict = NF_DROP;
76 			return 0;
77 		}
78 	}
79 	/* NF_ACCEPT */
80 	return 1;
81 }
82 
sctp_nat_csum(struct sk_buff * skb,struct sctphdr * sctph,unsigned int sctphoff)83 static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
84 			  unsigned int sctphoff)
85 {
86 	sctph->checksum = sctp_compute_cksum(skb, sctphoff);
87 	skb->ip_summed = CHECKSUM_UNNECESSARY;
88 }
89 
90 static int
sctp_snat_handler(struct sk_buff * skb,struct ip_vs_protocol * pp,struct ip_vs_conn * cp,struct ip_vs_iphdr * iph)91 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
92 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
93 {
94 	struct sctphdr *sctph;
95 	unsigned int sctphoff = iph->len;
96 	bool payload_csum = false;
97 
98 #ifdef CONFIG_IP_VS_IPV6
99 	if (cp->af == AF_INET6 && iph->fragoffs)
100 		return 1;
101 #endif
102 
103 	/* csum_check requires unshared skb */
104 	if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
105 		return 0;
106 
107 	if (unlikely(cp->app != NULL)) {
108 		int ret;
109 
110 		/* Some checks before mangling */
111 		if (!sctp_csum_check(cp->af, skb, pp))
112 			return 0;
113 
114 		/* Call application helper if needed */
115 		ret = ip_vs_app_pkt_out(cp, skb, iph);
116 		if (ret == 0)
117 			return 0;
118 		/* ret=2: csum update is needed after payload mangling */
119 		if (ret == 2)
120 			payload_csum = true;
121 	}
122 
123 	sctph = (void *) skb_network_header(skb) + sctphoff;
124 
125 	/* Only update csum if we really have to */
126 	if (sctph->source != cp->vport || payload_csum ||
127 	    skb->ip_summed == CHECKSUM_PARTIAL) {
128 		sctph->source = cp->vport;
129 		if (!skb_is_gso(skb))
130 			sctp_nat_csum(skb, sctph, sctphoff);
131 	} else {
132 		skb->ip_summed = CHECKSUM_UNNECESSARY;
133 	}
134 
135 	return 1;
136 }
137 
138 static int
sctp_dnat_handler(struct sk_buff * skb,struct ip_vs_protocol * pp,struct ip_vs_conn * cp,struct ip_vs_iphdr * iph)139 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
140 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
141 {
142 	struct sctphdr *sctph;
143 	unsigned int sctphoff = iph->len;
144 	bool payload_csum = false;
145 
146 #ifdef CONFIG_IP_VS_IPV6
147 	if (cp->af == AF_INET6 && iph->fragoffs)
148 		return 1;
149 #endif
150 
151 	/* csum_check requires unshared skb */
152 	if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
153 		return 0;
154 
155 	if (unlikely(cp->app != NULL)) {
156 		int ret;
157 
158 		/* Some checks before mangling */
159 		if (!sctp_csum_check(cp->af, skb, pp))
160 			return 0;
161 
162 		/* Call application helper if needed */
163 		ret = ip_vs_app_pkt_in(cp, skb, iph);
164 		if (ret == 0)
165 			return 0;
166 		/* ret=2: csum update is needed after payload mangling */
167 		if (ret == 2)
168 			payload_csum = true;
169 	}
170 
171 	sctph = (void *) skb_network_header(skb) + sctphoff;
172 
173 	/* Only update csum if we really have to */
174 	if (sctph->dest != cp->dport || payload_csum ||
175 	    (skb->ip_summed == CHECKSUM_PARTIAL &&
176 	     !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
177 		sctph->dest = cp->dport;
178 		if (!skb_is_gso(skb))
179 			sctp_nat_csum(skb, sctph, sctphoff);
180 	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
181 		skb->ip_summed = CHECKSUM_UNNECESSARY;
182 	}
183 
184 	return 1;
185 }
186 
187 static int
sctp_csum_check(int af,struct sk_buff * skb,struct ip_vs_protocol * pp)188 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
189 {
190 	unsigned int sctphoff;
191 	struct sctphdr *sh;
192 	__le32 cmp, val;
193 
194 #ifdef CONFIG_IP_VS_IPV6
195 	if (af == AF_INET6)
196 		sctphoff = sizeof(struct ipv6hdr);
197 	else
198 #endif
199 		sctphoff = ip_hdrlen(skb);
200 
201 	sh = (struct sctphdr *)(skb->data + sctphoff);
202 	cmp = sh->checksum;
203 	val = sctp_compute_cksum(skb, sctphoff);
204 
205 	if (val != cmp) {
206 		/* CRC failure, dump it. */
207 		IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
208 				"Failed checksum for");
209 		return 0;
210 	}
211 	return 1;
212 }
213 
214 enum ipvs_sctp_event_t {
215 	IP_VS_SCTP_DATA = 0,		/* DATA, SACK, HEARTBEATs */
216 	IP_VS_SCTP_INIT,
217 	IP_VS_SCTP_INIT_ACK,
218 	IP_VS_SCTP_COOKIE_ECHO,
219 	IP_VS_SCTP_COOKIE_ACK,
220 	IP_VS_SCTP_SHUTDOWN,
221 	IP_VS_SCTP_SHUTDOWN_ACK,
222 	IP_VS_SCTP_SHUTDOWN_COMPLETE,
223 	IP_VS_SCTP_ERROR,
224 	IP_VS_SCTP_ABORT,
225 	IP_VS_SCTP_EVENT_LAST
226 };
227 
228 /* RFC 2960, 3.2 Chunk Field Descriptions */
229 static __u8 sctp_events[] = {
230 	[SCTP_CID_DATA]			= IP_VS_SCTP_DATA,
231 	[SCTP_CID_INIT]			= IP_VS_SCTP_INIT,
232 	[SCTP_CID_INIT_ACK]		= IP_VS_SCTP_INIT_ACK,
233 	[SCTP_CID_SACK]			= IP_VS_SCTP_DATA,
234 	[SCTP_CID_HEARTBEAT]		= IP_VS_SCTP_DATA,
235 	[SCTP_CID_HEARTBEAT_ACK]	= IP_VS_SCTP_DATA,
236 	[SCTP_CID_ABORT]		= IP_VS_SCTP_ABORT,
237 	[SCTP_CID_SHUTDOWN]		= IP_VS_SCTP_SHUTDOWN,
238 	[SCTP_CID_SHUTDOWN_ACK]		= IP_VS_SCTP_SHUTDOWN_ACK,
239 	[SCTP_CID_ERROR]		= IP_VS_SCTP_ERROR,
240 	[SCTP_CID_COOKIE_ECHO]		= IP_VS_SCTP_COOKIE_ECHO,
241 	[SCTP_CID_COOKIE_ACK]		= IP_VS_SCTP_COOKIE_ACK,
242 	[SCTP_CID_ECN_ECNE]		= IP_VS_SCTP_DATA,
243 	[SCTP_CID_ECN_CWR]		= IP_VS_SCTP_DATA,
244 	[SCTP_CID_SHUTDOWN_COMPLETE]	= IP_VS_SCTP_SHUTDOWN_COMPLETE,
245 };
246 
247 /* SCTP States:
248  * See RFC 2960, 4. SCTP Association State Diagram
249  *
250  * New states (not in diagram):
251  * - INIT1 state: use shorter timeout for dropped INIT packets
252  * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
253  * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
254  *
255  * The states are as seen in real server. In the diagram, INIT1, INIT,
256  * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
257  *
258  * States as per packets from client (C) and server (S):
259  *
260  * Setup of client connection:
261  * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
262  * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
263  * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
264  * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
265  *
266  * Setup of server connection:
267  * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
268  * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
269  * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
270  */
271 
272 #define sNO IP_VS_SCTP_S_NONE
273 #define sI1 IP_VS_SCTP_S_INIT1
274 #define sIN IP_VS_SCTP_S_INIT
275 #define sCS IP_VS_SCTP_S_COOKIE_SENT
276 #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
277 #define sCW IP_VS_SCTP_S_COOKIE_WAIT
278 #define sCO IP_VS_SCTP_S_COOKIE
279 #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
280 #define sES IP_VS_SCTP_S_ESTABLISHED
281 #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
282 #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
283 #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
284 #define sRJ IP_VS_SCTP_S_REJECTED
285 #define sCL IP_VS_SCTP_S_CLOSED
286 
287 static const __u8 sctp_states
288 	[IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
289 	{ /* INPUT */
290 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
291 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
292 /* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
293 /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
294 /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
295 /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
296 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
297 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
298 /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
299 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
300 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
301 	},
302 	{ /* OUTPUT */
303 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
304 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
305 /* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
306 /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
307 /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
308 /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
309 /* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
310 /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
311 /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
312 /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
313 /* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
314 	},
315 	{ /* INPUT-ONLY */
316 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
317 /* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
318 /* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
319 /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
320 /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
321 /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
322 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
323 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
324 /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
325 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
326 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
327 	},
328 };
329 
330 #define IP_VS_SCTP_MAX_RTO	((60 + 1) * HZ)
331 
332 /* Timeout table[state] */
333 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
334 	[IP_VS_SCTP_S_NONE]			= 2 * HZ,
335 	[IP_VS_SCTP_S_INIT1]			= (0 + 3 + 1) * HZ,
336 	[IP_VS_SCTP_S_INIT]			= IP_VS_SCTP_MAX_RTO,
337 	[IP_VS_SCTP_S_COOKIE_SENT]		= IP_VS_SCTP_MAX_RTO,
338 	[IP_VS_SCTP_S_COOKIE_REPLIED]		= IP_VS_SCTP_MAX_RTO,
339 	[IP_VS_SCTP_S_COOKIE_WAIT]		= IP_VS_SCTP_MAX_RTO,
340 	[IP_VS_SCTP_S_COOKIE]			= IP_VS_SCTP_MAX_RTO,
341 	[IP_VS_SCTP_S_COOKIE_ECHOED]		= IP_VS_SCTP_MAX_RTO,
342 	[IP_VS_SCTP_S_ESTABLISHED]		= 15 * 60 * HZ,
343 	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= IP_VS_SCTP_MAX_RTO,
344 	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= IP_VS_SCTP_MAX_RTO,
345 	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= IP_VS_SCTP_MAX_RTO,
346 	[IP_VS_SCTP_S_REJECTED]			= (0 + 3 + 1) * HZ,
347 	[IP_VS_SCTP_S_CLOSED]			= IP_VS_SCTP_MAX_RTO,
348 	[IP_VS_SCTP_S_LAST]			= 2 * HZ,
349 };
350 
351 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
352 	[IP_VS_SCTP_S_NONE]			= "NONE",
353 	[IP_VS_SCTP_S_INIT1]			= "INIT1",
354 	[IP_VS_SCTP_S_INIT]			= "INIT",
355 	[IP_VS_SCTP_S_COOKIE_SENT]		= "C-SENT",
356 	[IP_VS_SCTP_S_COOKIE_REPLIED]		= "C-REPLIED",
357 	[IP_VS_SCTP_S_COOKIE_WAIT]		= "C-WAIT",
358 	[IP_VS_SCTP_S_COOKIE]			= "COOKIE",
359 	[IP_VS_SCTP_S_COOKIE_ECHOED]		= "C-ECHOED",
360 	[IP_VS_SCTP_S_ESTABLISHED]		= "ESTABLISHED",
361 	[IP_VS_SCTP_S_SHUTDOWN_SENT]		= "S-SENT",
362 	[IP_VS_SCTP_S_SHUTDOWN_RECEIVED]	= "S-RECEIVED",
363 	[IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]	= "S-ACK-SENT",
364 	[IP_VS_SCTP_S_REJECTED]			= "REJECTED",
365 	[IP_VS_SCTP_S_CLOSED]			= "CLOSED",
366 	[IP_VS_SCTP_S_LAST]			= "BUG!",
367 };
368 
369 
sctp_state_name(int state)370 static const char *sctp_state_name(int state)
371 {
372 	if (state >= IP_VS_SCTP_S_LAST)
373 		return "ERR!";
374 	if (sctp_state_name_table[state])
375 		return sctp_state_name_table[state];
376 	return "?";
377 }
378 
379 static inline void
set_sctp_state(struct ip_vs_proto_data * pd,struct ip_vs_conn * cp,int direction,const struct sk_buff * skb)380 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
381 		int direction, const struct sk_buff *skb)
382 {
383 	struct sctp_chunkhdr _sctpch, *sch;
384 	unsigned char chunk_type;
385 	int event, next_state;
386 	int ihl, cofs;
387 
388 #ifdef CONFIG_IP_VS_IPV6
389 	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
390 #else
391 	ihl = ip_hdrlen(skb);
392 #endif
393 
394 	cofs = ihl + sizeof(struct sctphdr);
395 	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
396 	if (sch == NULL)
397 		return;
398 
399 	chunk_type = sch->type;
400 	/*
401 	 * Section 3: Multiple chunks can be bundled into one SCTP packet
402 	 * up to the MTU size, except for the INIT, INIT ACK, and
403 	 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
404 	 * any other chunk in a packet.
405 	 *
406 	 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
407 	 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
408 	 * bundled with an ABORT, but they MUST be placed before the ABORT
409 	 * in the SCTP packet or they will be ignored by the receiver.
410 	 */
411 	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
412 	    (sch->type == SCTP_CID_COOKIE_ACK)) {
413 		int clen = ntohs(sch->length);
414 
415 		if (clen >= sizeof(_sctpch)) {
416 			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
417 						 sizeof(_sctpch), &_sctpch);
418 			if (sch && sch->type == SCTP_CID_ABORT)
419 				chunk_type = sch->type;
420 		}
421 	}
422 
423 	event = (chunk_type < sizeof(sctp_events)) ?
424 		sctp_events[chunk_type] : IP_VS_SCTP_DATA;
425 
426 	/* Update direction to INPUT_ONLY if necessary
427 	 * or delete NO_OUTPUT flag if output packet detected
428 	 */
429 	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
430 		if (direction == IP_VS_DIR_OUTPUT)
431 			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
432 		else
433 			direction = IP_VS_DIR_INPUT_ONLY;
434 	}
435 
436 	next_state = sctp_states[direction][event][cp->state];
437 
438 	if (next_state != cp->state) {
439 		struct ip_vs_dest *dest = cp->dest;
440 
441 		IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
442 				"%s:%d state: %s->%s conn->refcnt:%d\n",
443 				pd->pp->name,
444 				((direction == IP_VS_DIR_OUTPUT) ?
445 				 "output " : "input "),
446 				IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
447 				ntohs(cp->dport),
448 				IP_VS_DBG_ADDR(cp->af, &cp->caddr),
449 				ntohs(cp->cport),
450 				sctp_state_name(cp->state),
451 				sctp_state_name(next_state),
452 				refcount_read(&cp->refcnt));
453 		if (dest) {
454 			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
455 				(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
456 				atomic_dec(&dest->activeconns);
457 				atomic_inc(&dest->inactconns);
458 				cp->flags |= IP_VS_CONN_F_INACTIVE;
459 			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
460 				   (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
461 				atomic_inc(&dest->activeconns);
462 				atomic_dec(&dest->inactconns);
463 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
464 			}
465 		}
466 		if (next_state == IP_VS_SCTP_S_ESTABLISHED)
467 			ip_vs_control_assure_ct(cp);
468 	}
469 	if (likely(pd))
470 		cp->timeout = pd->timeout_table[cp->state = next_state];
471 	else	/* What to do ? */
472 		cp->timeout = sctp_timeouts[cp->state = next_state];
473 }
474 
475 static void
sctp_state_transition(struct ip_vs_conn * cp,int direction,const struct sk_buff * skb,struct ip_vs_proto_data * pd)476 sctp_state_transition(struct ip_vs_conn *cp, int direction,
477 		const struct sk_buff *skb, struct ip_vs_proto_data *pd)
478 {
479 	spin_lock_bh(&cp->lock);
480 	set_sctp_state(pd, cp, direction, skb);
481 	spin_unlock_bh(&cp->lock);
482 }
483 
sctp_app_hashkey(__be16 port)484 static inline __u16 sctp_app_hashkey(__be16 port)
485 {
486 	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
487 		& SCTP_APP_TAB_MASK;
488 }
489 
sctp_register_app(struct netns_ipvs * ipvs,struct ip_vs_app * inc)490 static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
491 {
492 	struct ip_vs_app *i;
493 	__u16 hash;
494 	__be16 port = inc->port;
495 	int ret = 0;
496 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
497 
498 	hash = sctp_app_hashkey(port);
499 
500 	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
501 		if (i->port == port) {
502 			ret = -EEXIST;
503 			goto out;
504 		}
505 	}
506 	list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
507 	atomic_inc(&pd->appcnt);
508 out:
509 
510 	return ret;
511 }
512 
sctp_unregister_app(struct netns_ipvs * ipvs,struct ip_vs_app * inc)513 static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
514 {
515 	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
516 
517 	atomic_dec(&pd->appcnt);
518 	list_del_rcu(&inc->p_list);
519 }
520 
sctp_app_conn_bind(struct ip_vs_conn * cp)521 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
522 {
523 	struct netns_ipvs *ipvs = cp->ipvs;
524 	int hash;
525 	struct ip_vs_app *inc;
526 	int result = 0;
527 
528 	/* Default binding: bind app only for NAT */
529 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
530 		return 0;
531 	/* Lookup application incarnations and bind the right one */
532 	hash = sctp_app_hashkey(cp->vport);
533 
534 	list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
535 		if (inc->port == cp->vport) {
536 			if (unlikely(!ip_vs_app_inc_get(inc)))
537 				break;
538 
539 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
540 					"%s:%u to app %s on port %u\n",
541 					__func__,
542 					IP_VS_DBG_ADDR(cp->af, &cp->caddr),
543 					ntohs(cp->cport),
544 					IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
545 					ntohs(cp->vport),
546 					inc->name, ntohs(inc->port));
547 			cp->app = inc;
548 			if (inc->init_conn)
549 				result = inc->init_conn(inc, cp);
550 			break;
551 		}
552 	}
553 
554 	return result;
555 }
556 
557 /* ---------------------------------------------
558  *   timeouts is netns related now.
559  * ---------------------------------------------
560  */
__ip_vs_sctp_init(struct netns_ipvs * ipvs,struct ip_vs_proto_data * pd)561 static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
562 {
563 	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
564 	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
565 							sizeof(sctp_timeouts));
566 	if (!pd->timeout_table)
567 		return -ENOMEM;
568 	return 0;
569 }
570 
__ip_vs_sctp_exit(struct netns_ipvs * ipvs,struct ip_vs_proto_data * pd)571 static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
572 {
573 	kfree(pd->timeout_table);
574 }
575 
576 struct ip_vs_protocol ip_vs_protocol_sctp = {
577 	.name		= "SCTP",
578 	.protocol	= IPPROTO_SCTP,
579 	.num_states	= IP_VS_SCTP_S_LAST,
580 	.dont_defrag	= 0,
581 	.init		= NULL,
582 	.exit		= NULL,
583 	.init_netns	= __ip_vs_sctp_init,
584 	.exit_netns	= __ip_vs_sctp_exit,
585 	.register_app	= sctp_register_app,
586 	.unregister_app = sctp_unregister_app,
587 	.conn_schedule	= sctp_conn_schedule,
588 	.conn_in_get	= ip_vs_conn_in_get_proto,
589 	.conn_out_get	= ip_vs_conn_out_get_proto,
590 	.snat_handler	= sctp_snat_handler,
591 	.dnat_handler	= sctp_dnat_handler,
592 	.state_name	= sctp_state_name,
593 	.state_transition = sctp_state_transition,
594 	.app_conn_bind	= sctp_app_conn_bind,
595 	.debug_packet	= ip_vs_tcpudp_debug_packet,
596 	.timeout_change	= NULL,
597 };
598