xref: /openbmc/linux/include/net/ip_vs.h (revision 63dca2c0b0e7a92cb39d1b1ecefa32ffda201975)
1 /*
2  *      IP Virtual Server
3  *      data structure and functionality definitions
4  */
5 
6 #ifndef _NET_IP_VS_H
7 #define _NET_IP_VS_H
8 
9 #include <linux/ip_vs.h>                /* definitions shared with userland */
10 
11 #include <asm/types.h>                  /* for __uXX types */
12 
13 #include <linux/list.h>                 /* for struct list_head */
14 #include <linux/spinlock.h>             /* for struct rwlock_t */
15 #include <linux/atomic.h>                 /* for struct atomic_t */
16 #include <linux/compiler.h>
17 #include <linux/timer.h>
18 #include <linux/bug.h>
19 
20 #include <net/checksum.h>
21 #include <linux/netfilter.h>		/* for union nf_inet_addr */
22 #include <linux/ip.h>
23 #include <linux/ipv6.h>			/* for struct ipv6hdr */
24 #include <net/ipv6.h>
25 #if IS_ENABLED(CONFIG_IPV6)
26 #include <linux/netfilter_ipv6/ip6_tables.h>
27 #endif
28 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
29 #include <net/netfilter/nf_conntrack.h>
30 #endif
31 #include <net/net_namespace.h>		/* Netw namespace */
32 
33 /*
34  * Generic access of ipvs struct
35  */
36 static inline struct netns_ipvs *net_ipvs(struct net* net)
37 {
38 	return net->ipvs;
39 }
40 /*
41  * Get net ptr from skb in traffic cases
42  * use skb_sknet when call is from userland (ioctl or netlink)
43  */
44 static inline struct net *skb_net(const struct sk_buff *skb)
45 {
46 #ifdef CONFIG_NET_NS
47 #ifdef CONFIG_IP_VS_DEBUG
48 	/*
49 	 * This is used for debug only.
50 	 * Start with the most likely hit
51 	 * End with BUG
52 	 */
53 	if (likely(skb->dev && skb->dev->nd_net))
54 		return dev_net(skb->dev);
55 	if (skb_dst(skb) && skb_dst(skb)->dev)
56 		return dev_net(skb_dst(skb)->dev);
57 	WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
58 		      __func__, __LINE__);
59 	if (likely(skb->sk && skb->sk->sk_net))
60 		return sock_net(skb->sk);
61 	pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
62 		__func__, __LINE__);
63 	BUG();
64 #else
65 	return dev_net(skb->dev ? : skb_dst(skb)->dev);
66 #endif
67 #else
68 	return &init_net;
69 #endif
70 }
71 
72 static inline struct net *skb_sknet(const struct sk_buff *skb)
73 {
74 #ifdef CONFIG_NET_NS
75 #ifdef CONFIG_IP_VS_DEBUG
76 	/* Start with the most likely hit */
77 	if (likely(skb->sk && skb->sk->sk_net))
78 		return sock_net(skb->sk);
79 	WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
80 		       __func__, __LINE__);
81 	if (likely(skb->dev && skb->dev->nd_net))
82 		return dev_net(skb->dev);
83 	pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
84 		__func__, __LINE__);
85 	BUG();
86 #else
87 	return sock_net(skb->sk);
88 #endif
89 #else
90 	return &init_net;
91 #endif
92 }
93 /*
94  * This one needed for single_open_net since net is stored directly in
95  * private not as a struct i.e. seq_file_net can't be used.
96  */
97 static inline struct net *seq_file_single_net(struct seq_file *seq)
98 {
99 #ifdef CONFIG_NET_NS
100 	return (struct net *)seq->private;
101 #else
102 	return &init_net;
103 #endif
104 }
105 
106 /* Connections' size value needed by ip_vs_ctl.c */
107 extern int ip_vs_conn_tab_size;
108 
109 struct ip_vs_iphdr {
110 	__u32 len;	/* IPv4 simply where L4 starts
111 			   IPv6 where L4 Transport Header starts */
112 	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
113 	__s16 protocol;
114 	__s32 flags;
115 	union nf_inet_addr saddr;
116 	union nf_inet_addr daddr;
117 };
118 
119 static inline void
120 ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
121 {
122 	const struct iphdr *iph = nh;
123 
124 	iphdr->len	= iph->ihl * 4;
125 	iphdr->fragoffs	= 0;
126 	iphdr->protocol	= iph->protocol;
127 	iphdr->saddr.ip	= iph->saddr;
128 	iphdr->daddr.ip	= iph->daddr;
129 }
130 
131 /* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6.
132  * IPv6 requires some extra work, as finding proper header position,
133  * depend on the IPv6 extension headers.
134  */
135 static inline void
136 ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
137 {
138 #ifdef CONFIG_IP_VS_IPV6
139 	if (af == AF_INET6) {
140 		const struct ipv6hdr *iph =
141 			(struct ipv6hdr *)skb_network_header(skb);
142 		iphdr->saddr.in6 = iph->saddr;
143 		iphdr->daddr.in6 = iph->daddr;
144 		/* ipv6_find_hdr() updates len, flags */
145 		iphdr->len	 = 0;
146 		iphdr->flags	 = 0;
147 		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
148 						 &iphdr->fragoffs,
149 						 &iphdr->flags);
150 	} else
151 #endif
152 	{
153 		const struct iphdr *iph =
154 			(struct iphdr *)skb_network_header(skb);
155 		iphdr->len	= iph->ihl * 4;
156 		iphdr->fragoffs	= 0;
157 		iphdr->protocol	= iph->protocol;
158 		iphdr->saddr.ip	= iph->saddr;
159 		iphdr->daddr.ip	= iph->daddr;
160 	}
161 }
162 
163 /* This function is a faster version of ip_vs_fill_iph_skb().
164  * Where we only populate {s,d}addr (and avoid calling ipv6_find_hdr()).
165  * This is used by the some of the ip_vs_*_schedule() functions.
166  * (Mostly done to avoid ABI breakage of external schedulers)
167  */
168 static inline void
169 ip_vs_fill_iph_addr_only(int af, const struct sk_buff *skb,
170 			 struct ip_vs_iphdr *iphdr)
171 {
172 #ifdef CONFIG_IP_VS_IPV6
173 	if (af == AF_INET6) {
174 		const struct ipv6hdr *iph =
175 			(struct ipv6hdr *)skb_network_header(skb);
176 		iphdr->saddr.in6 = iph->saddr;
177 		iphdr->daddr.in6 = iph->daddr;
178 	} else {
179 #endif
180 		const struct iphdr *iph =
181 			(struct iphdr *)skb_network_header(skb);
182 		iphdr->saddr.ip = iph->saddr;
183 		iphdr->daddr.ip = iph->daddr;
184 	}
185 }
186 
187 static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
188 				   const union nf_inet_addr *src)
189 {
190 #ifdef CONFIG_IP_VS_IPV6
191 	if (af == AF_INET6)
192 		dst->in6 = src->in6;
193 	else
194 #endif
195 	dst->ip = src->ip;
196 }
197 
198 static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
199 				   const union nf_inet_addr *b)
200 {
201 #ifdef CONFIG_IP_VS_IPV6
202 	if (af == AF_INET6)
203 		return ipv6_addr_equal(&a->in6, &b->in6);
204 #endif
205 	return a->ip == b->ip;
206 }
207 
208 #ifdef CONFIG_IP_VS_DEBUG
209 #include <linux/net.h>
210 
211 extern int ip_vs_get_debug_level(void);
212 
213 static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
214 					 const union nf_inet_addr *addr,
215 					 int *idx)
216 {
217 	int len;
218 #ifdef CONFIG_IP_VS_IPV6
219 	if (af == AF_INET6)
220 		len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6c]",
221 			       &addr->in6) + 1;
222 	else
223 #endif
224 		len = snprintf(&buf[*idx], buf_len - *idx, "%pI4",
225 			       &addr->ip) + 1;
226 
227 	*idx += len;
228 	BUG_ON(*idx > buf_len + 1);
229 	return &buf[*idx - len];
230 }
231 
232 #define IP_VS_DBG_BUF(level, msg, ...)					\
233 	do {								\
234 		char ip_vs_dbg_buf[160];				\
235 		int ip_vs_dbg_idx = 0;					\
236 		if (level <= ip_vs_get_debug_level())			\
237 			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
238 	} while (0)
239 #define IP_VS_ERR_BUF(msg...)						\
240 	do {								\
241 		char ip_vs_dbg_buf[160];				\
242 		int ip_vs_dbg_idx = 0;					\
243 		pr_err(msg);						\
244 	} while (0)
245 
246 /* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
247 #define IP_VS_DBG_ADDR(af, addr)					\
248 	ip_vs_dbg_addr(af, ip_vs_dbg_buf,				\
249 		       sizeof(ip_vs_dbg_buf), addr,			\
250 		       &ip_vs_dbg_idx)
251 
252 #define IP_VS_DBG(level, msg, ...)					\
253 	do {								\
254 		if (level <= ip_vs_get_debug_level())			\
255 			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
256 	} while (0)
257 #define IP_VS_DBG_RL(msg, ...)						\
258 	do {								\
259 		if (net_ratelimit())					\
260 			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
261 	} while (0)
262 #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)			\
263 	do {								\
264 		if (level <= ip_vs_get_debug_level())			\
265 			pp->debug_packet(af, pp, skb, ofs, msg);	\
266 	} while (0)
267 #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)			\
268 	do {								\
269 		if (level <= ip_vs_get_debug_level() &&			\
270 		    net_ratelimit())					\
271 			pp->debug_packet(af, pp, skb, ofs, msg);	\
272 	} while (0)
273 #else	/* NO DEBUGGING at ALL */
274 #define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
275 #define IP_VS_ERR_BUF(msg...)  do {} while (0)
276 #define IP_VS_DBG(level, msg...)  do {} while (0)
277 #define IP_VS_DBG_RL(msg...)  do {} while (0)
278 #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
279 #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
280 #endif
281 
282 #define IP_VS_BUG() BUG()
283 #define IP_VS_ERR_RL(msg, ...)						\
284 	do {								\
285 		if (net_ratelimit())					\
286 			pr_err(msg, ##__VA_ARGS__);			\
287 	} while (0)
288 
289 #ifdef CONFIG_IP_VS_DEBUG
290 #define EnterFunction(level)						\
291 	do {								\
292 		if (level <= ip_vs_get_debug_level())			\
293 			printk(KERN_DEBUG				\
294 			       pr_fmt("Enter: %s, %s line %i\n"),	\
295 			       __func__, __FILE__, __LINE__);		\
296 	} while (0)
297 #define LeaveFunction(level)						\
298 	do {								\
299 		if (level <= ip_vs_get_debug_level())			\
300 			printk(KERN_DEBUG				\
301 			       pr_fmt("Leave: %s, %s line %i\n"),	\
302 			       __func__, __FILE__, __LINE__);		\
303 	} while (0)
304 #else
305 #define EnterFunction(level)   do {} while (0)
306 #define LeaveFunction(level)   do {} while (0)
307 #endif
308 
309 #define	IP_VS_WAIT_WHILE(expr)	while (expr) { cpu_relax(); }
310 
311 
312 /*
313  *      The port number of FTP service (in network order).
314  */
315 #define FTPPORT  cpu_to_be16(21)
316 #define FTPDATA  cpu_to_be16(20)
317 
318 /*
319  *      TCP State Values
320  */
321 enum {
322 	IP_VS_TCP_S_NONE = 0,
323 	IP_VS_TCP_S_ESTABLISHED,
324 	IP_VS_TCP_S_SYN_SENT,
325 	IP_VS_TCP_S_SYN_RECV,
326 	IP_VS_TCP_S_FIN_WAIT,
327 	IP_VS_TCP_S_TIME_WAIT,
328 	IP_VS_TCP_S_CLOSE,
329 	IP_VS_TCP_S_CLOSE_WAIT,
330 	IP_VS_TCP_S_LAST_ACK,
331 	IP_VS_TCP_S_LISTEN,
332 	IP_VS_TCP_S_SYNACK,
333 	IP_VS_TCP_S_LAST
334 };
335 
336 /*
337  *	UDP State Values
338  */
339 enum {
340 	IP_VS_UDP_S_NORMAL,
341 	IP_VS_UDP_S_LAST,
342 };
343 
344 /*
345  *	ICMP State Values
346  */
347 enum {
348 	IP_VS_ICMP_S_NORMAL,
349 	IP_VS_ICMP_S_LAST,
350 };
351 
352 /*
353  *	SCTP State Values
354  */
355 enum ip_vs_sctp_states {
356 	IP_VS_SCTP_S_NONE,
357 	IP_VS_SCTP_S_INIT_CLI,
358 	IP_VS_SCTP_S_INIT_SER,
359 	IP_VS_SCTP_S_INIT_ACK_CLI,
360 	IP_VS_SCTP_S_INIT_ACK_SER,
361 	IP_VS_SCTP_S_ECHO_CLI,
362 	IP_VS_SCTP_S_ECHO_SER,
363 	IP_VS_SCTP_S_ESTABLISHED,
364 	IP_VS_SCTP_S_SHUT_CLI,
365 	IP_VS_SCTP_S_SHUT_SER,
366 	IP_VS_SCTP_S_SHUT_ACK_CLI,
367 	IP_VS_SCTP_S_SHUT_ACK_SER,
368 	IP_VS_SCTP_S_CLOSED,
369 	IP_VS_SCTP_S_LAST
370 };
371 
372 /*
373  *	Delta sequence info structure
374  *	Each ip_vs_conn has 2 (output AND input seq. changes).
375  *      Only used in the VS/NAT.
376  */
377 struct ip_vs_seq {
378 	__u32			init_seq;	/* Add delta from this seq */
379 	__u32			delta;		/* Delta in sequence numbers */
380 	__u32			previous_delta;	/* Delta in sequence numbers
381 						   before last resized pkt */
382 };
383 
384 /*
385  * counters per cpu
386  */
387 struct ip_vs_counters {
388 	__u32		conns;		/* connections scheduled */
389 	__u32		inpkts;		/* incoming packets */
390 	__u32		outpkts;	/* outgoing packets */
391 	__u64		inbytes;	/* incoming bytes */
392 	__u64		outbytes;	/* outgoing bytes */
393 };
394 /*
395  * Stats per cpu
396  */
397 struct ip_vs_cpu_stats {
398 	struct ip_vs_counters   ustats;
399 	struct u64_stats_sync   syncp;
400 };
401 
402 /*
403  *	IPVS statistics objects
404  */
405 struct ip_vs_estimator {
406 	struct list_head	list;
407 
408 	u64			last_inbytes;
409 	u64			last_outbytes;
410 	u32			last_conns;
411 	u32			last_inpkts;
412 	u32			last_outpkts;
413 
414 	u32			cps;
415 	u32			inpps;
416 	u32			outpps;
417 	u32			inbps;
418 	u32			outbps;
419 };
420 
421 struct ip_vs_stats {
422 	struct ip_vs_stats_user	ustats;		/* statistics */
423 	struct ip_vs_estimator	est;		/* estimator */
424 	struct ip_vs_cpu_stats	*cpustats;	/* per cpu counters */
425 	spinlock_t		lock;		/* spin lock */
426 	struct ip_vs_stats_user	ustats0;	/* reset values */
427 };
428 
429 struct dst_entry;
430 struct iphdr;
431 struct ip_vs_conn;
432 struct ip_vs_app;
433 struct sk_buff;
434 struct ip_vs_proto_data;
435 
436 struct ip_vs_protocol {
437 	struct ip_vs_protocol	*next;
438 	char			*name;
439 	u16			protocol;
440 	u16			num_states;
441 	int			dont_defrag;
442 
443 	void (*init)(struct ip_vs_protocol *pp);
444 
445 	void (*exit)(struct ip_vs_protocol *pp);
446 
447 	int (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
448 
449 	void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
450 
451 	int (*conn_schedule)(int af, struct sk_buff *skb,
452 			     struct ip_vs_proto_data *pd,
453 			     int *verdict, struct ip_vs_conn **cpp);
454 
455 	struct ip_vs_conn *
456 	(*conn_in_get)(int af,
457 		       const struct sk_buff *skb,
458 		       const struct ip_vs_iphdr *iph,
459 		       unsigned int proto_off,
460 		       int inverse);
461 
462 	struct ip_vs_conn *
463 	(*conn_out_get)(int af,
464 			const struct sk_buff *skb,
465 			const struct ip_vs_iphdr *iph,
466 			unsigned int proto_off,
467 			int inverse);
468 
469 	int (*snat_handler)(struct sk_buff *skb,
470 			    struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
471 
472 	int (*dnat_handler)(struct sk_buff *skb,
473 			    struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
474 
475 	int (*csum_check)(int af, struct sk_buff *skb,
476 			  struct ip_vs_protocol *pp);
477 
478 	const char *(*state_name)(int state);
479 
480 	void (*state_transition)(struct ip_vs_conn *cp, int direction,
481 				 const struct sk_buff *skb,
482 				 struct ip_vs_proto_data *pd);
483 
484 	int (*register_app)(struct net *net, struct ip_vs_app *inc);
485 
486 	void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
487 
488 	int (*app_conn_bind)(struct ip_vs_conn *cp);
489 
490 	void (*debug_packet)(int af, struct ip_vs_protocol *pp,
491 			     const struct sk_buff *skb,
492 			     int offset,
493 			     const char *msg);
494 
495 	void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
496 };
497 
498 /*
499  * protocol data per netns
500  */
501 struct ip_vs_proto_data {
502 	struct ip_vs_proto_data	*next;
503 	struct ip_vs_protocol	*pp;
504 	int			*timeout_table;	/* protocol timeout table */
505 	atomic_t		appcnt;		/* counter of proto app incs. */
506 	struct tcp_states_t	*tcp_state_table;
507 };
508 
509 extern struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
510 extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
511 						     unsigned short proto);
512 
513 struct ip_vs_conn_param {
514 	struct net			*net;
515 	const union nf_inet_addr	*caddr;
516 	const union nf_inet_addr	*vaddr;
517 	__be16				cport;
518 	__be16				vport;
519 	__u16				protocol;
520 	u16				af;
521 
522 	const struct ip_vs_pe		*pe;
523 	char				*pe_data;
524 	__u8				pe_data_len;
525 };
526 
527 /*
528  *	IP_VS structure allocated for each dynamically scheduled connection
529  */
530 struct ip_vs_conn {
531 	struct hlist_node	c_list;         /* hashed list heads */
532 #ifdef CONFIG_NET_NS
533 	struct net              *net;           /* Name space */
534 #endif
535 	/* Protocol, addresses and port numbers */
536 	u16                     af;             /* address family */
537 	__be16                  cport;
538 	__be16                  vport;
539 	__be16                  dport;
540 	__u32                   fwmark;         /* Fire wall mark from skb */
541 	union nf_inet_addr      caddr;          /* client address */
542 	union nf_inet_addr      vaddr;          /* virtual address */
543 	union nf_inet_addr      daddr;          /* destination address */
544 	volatile __u32          flags;          /* status flags */
545 	__u16                   protocol;       /* Which protocol (TCP/UDP) */
546 
547 	/* counter and timer */
548 	atomic_t		refcnt;		/* reference count */
549 	struct timer_list	timer;		/* Expiration timer */
550 	volatile unsigned long	timeout;	/* timeout */
551 
552 	/* Flags and state transition */
553 	spinlock_t              lock;           /* lock for state transition */
554 	volatile __u16          state;          /* state info */
555 	volatile __u16          old_state;      /* old state, to be used for
556 						 * state transition triggerd
557 						 * synchronization
558 						 */
559 	unsigned long		sync_endtime;	/* jiffies + sent_retries */
560 
561 	/* Control members */
562 	struct ip_vs_conn       *control;       /* Master control connection */
563 	atomic_t                n_control;      /* Number of controlled ones */
564 	struct ip_vs_dest       *dest;          /* real server */
565 	atomic_t                in_pkts;        /* incoming packet counter */
566 
567 	/* packet transmitter for different forwarding methods.  If it
568 	   mangles the packet, it must return NF_DROP or better NF_STOLEN,
569 	   otherwise this must be changed to a sk_buff **.
570 	   NF_ACCEPT can be returned when destination is local.
571 	 */
572 	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
573 			   struct ip_vs_protocol *pp);
574 
575 	/* Note: we can group the following members into a structure,
576 	   in order to save more space, and the following members are
577 	   only used in VS/NAT anyway */
578 	struct ip_vs_app        *app;           /* bound ip_vs_app object */
579 	void                    *app_data;      /* Application private data */
580 	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
581 	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
582 
583 	const struct ip_vs_pe	*pe;
584 	char			*pe_data;
585 	__u8			pe_data_len;
586 };
587 
588 /*
589  *  To save some memory in conn table when name space is disabled.
590  */
591 static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
592 {
593 #ifdef CONFIG_NET_NS
594 	return cp->net;
595 #else
596 	return &init_net;
597 #endif
598 }
599 static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
600 {
601 #ifdef CONFIG_NET_NS
602 	cp->net = net;
603 #endif
604 }
605 
606 static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
607 				    struct net *net)
608 {
609 #ifdef CONFIG_NET_NS
610 	return cp->net == net;
611 #else
612 	return 1;
613 #endif
614 }
615 
616 /*
617  *	Extended internal versions of struct ip_vs_service_user and
618  *	ip_vs_dest_user for IPv6 support.
619  *
620  *	We need these to conveniently pass around service and destination
621  *	options, but unfortunately, we also need to keep the old definitions to
622  *	maintain userspace backwards compatibility for the setsockopt interface.
623  */
624 struct ip_vs_service_user_kern {
625 	/* virtual service addresses */
626 	u16			af;
627 	u16			protocol;
628 	union nf_inet_addr	addr;		/* virtual ip address */
629 	u16			port;
630 	u32			fwmark;		/* firwall mark of service */
631 
632 	/* virtual service options */
633 	char			*sched_name;
634 	char			*pe_name;
635 	unsigned int		flags;		/* virtual service flags */
636 	unsigned int		timeout;	/* persistent timeout in sec */
637 	u32			netmask;	/* persistent netmask */
638 };
639 
640 
641 struct ip_vs_dest_user_kern {
642 	/* destination server address */
643 	union nf_inet_addr	addr;
644 	u16			port;
645 
646 	/* real server options */
647 	unsigned int		conn_flags;	/* connection flags */
648 	int			weight;		/* destination weight */
649 
650 	/* thresholds for active connections */
651 	u32			u_threshold;	/* upper threshold */
652 	u32			l_threshold;	/* lower threshold */
653 };
654 
655 
656 /*
657  *	The information about the virtual service offered to the net
658  *	and the forwarding entries
659  */
660 struct ip_vs_service {
661 	struct list_head	s_list;   /* for normal service table */
662 	struct list_head	f_list;   /* for fwmark-based service table */
663 	atomic_t		refcnt;   /* reference counter */
664 	atomic_t		usecnt;   /* use counter */
665 
666 	u16			af;       /* address family */
667 	__u16			protocol; /* which protocol (TCP/UDP) */
668 	union nf_inet_addr	addr;	  /* IP address for virtual service */
669 	__be16			port;	  /* port number for the service */
670 	__u32                   fwmark;   /* firewall mark of the service */
671 	unsigned int		flags;	  /* service status flags */
672 	unsigned int		timeout;  /* persistent timeout in ticks */
673 	__be32			netmask;  /* grouping granularity */
674 	struct net		*net;
675 
676 	struct list_head	destinations;  /* real server d-linked list */
677 	__u32			num_dests;     /* number of servers */
678 	struct ip_vs_stats      stats;         /* statistics for the service */
679 	struct ip_vs_app	*inc;	  /* bind conns to this app inc */
680 
681 	/* for scheduling */
682 	struct ip_vs_scheduler	*scheduler;    /* bound scheduler object */
683 	rwlock_t		sched_lock;    /* lock sched_data */
684 	void			*sched_data;   /* scheduler application data */
685 
686 	/* alternate persistence engine */
687 	struct ip_vs_pe		*pe;
688 };
689 
690 
691 /*
692  *	The real server destination forwarding entry
693  *	with ip address, port number, and so on.
694  */
695 struct ip_vs_dest {
696 	struct list_head	n_list;   /* for the dests in the service */
697 	struct list_head	d_list;   /* for table with all the dests */
698 
699 	u16			af;		/* address family */
700 	__be16			port;		/* port number of the server */
701 	union nf_inet_addr	addr;		/* IP address of the server */
702 	volatile unsigned int	flags;		/* dest status flags */
703 	atomic_t		conn_flags;	/* flags to copy to conn */
704 	atomic_t		weight;		/* server weight */
705 
706 	atomic_t		refcnt;		/* reference counter */
707 	struct ip_vs_stats      stats;          /* statistics */
708 
709 	/* connection counters and thresholds */
710 	atomic_t		activeconns;	/* active connections */
711 	atomic_t		inactconns;	/* inactive connections */
712 	atomic_t		persistconns;	/* persistent connections */
713 	__u32			u_threshold;	/* upper threshold */
714 	__u32			l_threshold;	/* lower threshold */
715 
716 	/* for destination cache */
717 	spinlock_t		dst_lock;	/* lock of dst_cache */
718 	struct dst_entry	*dst_cache;	/* destination cache entry */
719 	u32			dst_rtos;	/* RT_TOS(tos) for dst */
720 	u32			dst_cookie;
721 	union nf_inet_addr	dst_saddr;
722 
723 	/* for virtual service */
724 	struct ip_vs_service	*svc;		/* service it belongs to */
725 	__u16			protocol;	/* which protocol (TCP/UDP) */
726 	__be16			vport;		/* virtual port number */
727 	union nf_inet_addr	vaddr;		/* virtual IP address */
728 	__u32			vfwmark;	/* firewall mark of service */
729 };
730 
731 
732 /*
733  *	The scheduler object
734  */
735 struct ip_vs_scheduler {
736 	struct list_head	n_list;		/* d-linked list head */
737 	char			*name;		/* scheduler name */
738 	atomic_t		refcnt;		/* reference counter */
739 	struct module		*module;	/* THIS_MODULE/NULL */
740 
741 	/* scheduler initializing service */
742 	int (*init_service)(struct ip_vs_service *svc);
743 	/* scheduling service finish */
744 	int (*done_service)(struct ip_vs_service *svc);
745 	/* scheduler updating service */
746 	int (*update_service)(struct ip_vs_service *svc);
747 
748 	/* selecting a server from the given service */
749 	struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
750 				       const struct sk_buff *skb);
751 };
752 
753 /* The persistence engine object */
754 struct ip_vs_pe {
755 	struct list_head	n_list;		/* d-linked list head */
756 	char			*name;		/* scheduler name */
757 	atomic_t		refcnt;		/* reference counter */
758 	struct module		*module;	/* THIS_MODULE/NULL */
759 
760 	/* get the connection template, if any */
761 	int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
762 	bool (*ct_match)(const struct ip_vs_conn_param *p,
763 			 struct ip_vs_conn *ct);
764 	u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
765 			   bool inverse);
766 	int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
767 };
768 
769 /*
770  *	The application module object (a.k.a. app incarnation)
771  */
772 struct ip_vs_app {
773 	struct list_head	a_list;		/* member in app list */
774 	int			type;		/* IP_VS_APP_TYPE_xxx */
775 	char			*name;		/* application module name */
776 	__u16			protocol;
777 	struct module		*module;	/* THIS_MODULE/NULL */
778 	struct list_head	incs_list;	/* list of incarnations */
779 
780 	/* members for application incarnations */
781 	struct list_head	p_list;		/* member in proto app list */
782 	struct ip_vs_app	*app;		/* its real application */
783 	__be16			port;		/* port number in net order */
784 	atomic_t		usecnt;		/* usage counter */
785 
786 	/*
787 	 * output hook: Process packet in inout direction, diff set for TCP.
788 	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
789 	 *	   2=Mangled but checksum was not updated
790 	 */
791 	int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
792 		       struct sk_buff *, int *diff);
793 
794 	/*
795 	 * input hook: Process packet in outin direction, diff set for TCP.
796 	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
797 	 *	   2=Mangled but checksum was not updated
798 	 */
799 	int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
800 		      struct sk_buff *, int *diff);
801 
802 	/* ip_vs_app initializer */
803 	int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
804 
805 	/* ip_vs_app finish */
806 	int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *);
807 
808 
809 	/* not used now */
810 	int (*bind_conn)(struct ip_vs_app *, struct ip_vs_conn *,
811 			 struct ip_vs_protocol *);
812 
813 	void (*unbind_conn)(struct ip_vs_app *, struct ip_vs_conn *);
814 
815 	int *			timeout_table;
816 	int *			timeouts;
817 	int			timeouts_size;
818 
819 	int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app,
820 			     int *verdict, struct ip_vs_conn **cpp);
821 
822 	struct ip_vs_conn *
823 	(*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
824 		       const struct iphdr *iph, unsigned int proto_off,
825 		       int inverse);
826 
827 	struct ip_vs_conn *
828 	(*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
829 			const struct iphdr *iph, unsigned int proto_off,
830 			int inverse);
831 
832 	int (*state_transition)(struct ip_vs_conn *cp, int direction,
833 				const struct sk_buff *skb,
834 				struct ip_vs_app *app);
835 
836 	void (*timeout_change)(struct ip_vs_app *app, int flags);
837 };
838 
839 struct ipvs_master_sync_state {
840 	struct list_head	sync_queue;
841 	struct ip_vs_sync_buff	*sync_buff;
842 	int			sync_queue_len;
843 	unsigned int		sync_queue_delay;
844 	struct task_struct	*master_thread;
845 	struct delayed_work	master_wakeup_work;
846 	struct netns_ipvs	*ipvs;
847 };
848 
849 /* IPVS in network namespace */
850 struct netns_ipvs {
851 	int			gen;		/* Generation */
852 	int			enable;		/* enable like nf_hooks do */
853 	/*
854 	 *	Hash table: for real service lookups
855 	 */
856 	#define IP_VS_RTAB_BITS 4
857 	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
858 	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
859 
860 	struct list_head	rs_table[IP_VS_RTAB_SIZE];
861 	/* ip_vs_app */
862 	struct list_head	app_list;
863 	/* ip_vs_proto */
864 	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
865 	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
866 	/* ip_vs_proto_tcp */
867 #ifdef CONFIG_IP_VS_PROTO_TCP
868 	#define	TCP_APP_TAB_BITS	4
869 	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
870 	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
871 	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
872 	spinlock_t		tcp_app_lock;
873 #endif
874 	/* ip_vs_proto_udp */
875 #ifdef CONFIG_IP_VS_PROTO_UDP
876 	#define	UDP_APP_TAB_BITS	4
877 	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
878 	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
879 	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
880 	spinlock_t		udp_app_lock;
881 #endif
882 	/* ip_vs_proto_sctp */
883 #ifdef CONFIG_IP_VS_PROTO_SCTP
884 	#define SCTP_APP_TAB_BITS	4
885 	#define SCTP_APP_TAB_SIZE	(1 << SCTP_APP_TAB_BITS)
886 	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
887 	/* Hash table for SCTP application incarnations	 */
888 	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
889 	spinlock_t		sctp_app_lock;
890 #endif
891 	/* ip_vs_conn */
892 	atomic_t		conn_count;      /*  connection counter */
893 
894 	/* ip_vs_ctl */
895 	struct ip_vs_stats		tot_stats;  /* Statistics & est. */
896 
897 	int			num_services;    /* no of virtual services */
898 
899 	rwlock_t		rs_lock;         /* real services table */
900 	/* Trash for destinations */
901 	struct list_head	dest_trash;
902 	/* Service counters */
903 	atomic_t		ftpsvc_counter;
904 	atomic_t		nullsvc_counter;
905 
906 #ifdef CONFIG_SYSCTL
907 	/* 1/rate drop and drop-entry variables */
908 	struct delayed_work	defense_work;   /* Work handler */
909 	int			drop_rate;
910 	int			drop_counter;
911 	atomic_t		dropentry;
912 	/* locks in ctl.c */
913 	spinlock_t		dropentry_lock;  /* drop entry handling */
914 	spinlock_t		droppacket_lock; /* drop packet handling */
915 	spinlock_t		securetcp_lock;  /* state and timeout tables */
916 
917 	/* sys-ctl struct */
918 	struct ctl_table_header	*sysctl_hdr;
919 	struct ctl_table	*sysctl_tbl;
920 #endif
921 
922 	/* sysctl variables */
923 	int			sysctl_amemthresh;
924 	int			sysctl_am_droprate;
925 	int			sysctl_drop_entry;
926 	int			sysctl_drop_packet;
927 	int			sysctl_secure_tcp;
928 #ifdef CONFIG_IP_VS_NFCT
929 	int			sysctl_conntrack;
930 #endif
931 	int			sysctl_snat_reroute;
932 	int			sysctl_sync_ver;
933 	int			sysctl_sync_ports;
934 	int			sysctl_sync_qlen_max;
935 	int			sysctl_sync_sock_size;
936 	int			sysctl_cache_bypass;
937 	int			sysctl_expire_nodest_conn;
938 	int			sysctl_expire_quiescent_template;
939 	int			sysctl_sync_threshold[2];
940 	unsigned int		sysctl_sync_refresh_period;
941 	int			sysctl_sync_retries;
942 	int			sysctl_nat_icmp_send;
943 	int			sysctl_pmtu_disc;
944 
945 	/* ip_vs_lblc */
946 	int			sysctl_lblc_expiration;
947 	struct ctl_table_header	*lblc_ctl_header;
948 	struct ctl_table	*lblc_ctl_table;
949 	/* ip_vs_lblcr */
950 	int			sysctl_lblcr_expiration;
951 	struct ctl_table_header	*lblcr_ctl_header;
952 	struct ctl_table	*lblcr_ctl_table;
953 	/* ip_vs_est */
954 	struct list_head	est_list;	/* estimator list */
955 	spinlock_t		est_lock;
956 	struct timer_list	est_timer;	/* Estimation timer */
957 	/* ip_vs_sync */
958 	spinlock_t		sync_lock;
959 	struct ipvs_master_sync_state *ms;
960 	spinlock_t		sync_buff_lock;
961 	struct task_struct	**backup_threads;
962 	int			threads_mask;
963 	int			send_mesg_maxlen;
964 	int			recv_mesg_maxlen;
965 	volatile int		sync_state;
966 	volatile int		master_syncid;
967 	volatile int		backup_syncid;
968 	struct mutex		sync_mutex;
969 	/* multicast interface name */
970 	char			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
971 	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
972 	/* net name space ptr */
973 	struct net		*net;            /* Needed by timer routines */
974 };
975 
976 #define DEFAULT_SYNC_THRESHOLD	3
977 #define DEFAULT_SYNC_PERIOD	50
978 #define DEFAULT_SYNC_VER	1
979 #define DEFAULT_SYNC_REFRESH_PERIOD	(0U * HZ)
980 #define DEFAULT_SYNC_RETRIES		0
981 #define IPVS_SYNC_WAKEUP_RATE	8
982 #define IPVS_SYNC_QLEN_MAX	(IPVS_SYNC_WAKEUP_RATE * 4)
983 #define IPVS_SYNC_SEND_DELAY	(HZ / 50)
984 #define IPVS_SYNC_CHECK_PERIOD	HZ
985 #define IPVS_SYNC_FLUSH_TIME	(HZ * 2)
986 #define IPVS_SYNC_PORTS_MAX	(1 << 6)
987 
988 #ifdef CONFIG_SYSCTL
989 
990 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
991 {
992 	return ipvs->sysctl_sync_threshold[0];
993 }
994 
995 static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
996 {
997 	return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
998 }
999 
1000 static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1001 {
1002 	return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
1003 }
1004 
1005 static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1006 {
1007 	return ipvs->sysctl_sync_retries;
1008 }
1009 
1010 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1011 {
1012 	return ipvs->sysctl_sync_ver;
1013 }
1014 
1015 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1016 {
1017 	return ACCESS_ONCE(ipvs->sysctl_sync_ports);
1018 }
1019 
1020 static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1021 {
1022 	return ipvs->sysctl_sync_qlen_max;
1023 }
1024 
1025 static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1026 {
1027 	return ipvs->sysctl_sync_sock_size;
1028 }
1029 
1030 static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1031 {
1032 	return ipvs->sysctl_pmtu_disc;
1033 }
1034 
1035 #else
1036 
1037 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
1038 {
1039 	return DEFAULT_SYNC_THRESHOLD;
1040 }
1041 
1042 static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1043 {
1044 	return DEFAULT_SYNC_PERIOD;
1045 }
1046 
1047 static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1048 {
1049 	return DEFAULT_SYNC_REFRESH_PERIOD;
1050 }
1051 
1052 static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1053 {
1054 	return DEFAULT_SYNC_RETRIES & 3;
1055 }
1056 
1057 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1058 {
1059 	return DEFAULT_SYNC_VER;
1060 }
1061 
1062 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1063 {
1064 	return 1;
1065 }
1066 
1067 static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1068 {
1069 	return IPVS_SYNC_QLEN_MAX;
1070 }
1071 
1072 static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1073 {
1074 	return 0;
1075 }
1076 
1077 static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1078 {
1079 	return 1;
1080 }
1081 
1082 #endif
1083 
1084 /*
1085  *      IPVS core functions
1086  *      (from ip_vs_core.c)
1087  */
1088 extern const char *ip_vs_proto_name(unsigned int proto);
1089 extern void ip_vs_init_hash_table(struct list_head *table, int rows);
1090 #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
1091 
1092 #define IP_VS_APP_TYPE_FTP	1
1093 
1094 /*
1095  *     ip_vs_conn handling functions
1096  *     (from ip_vs_conn.c)
1097  */
1098 
1099 enum {
1100 	IP_VS_DIR_INPUT = 0,
1101 	IP_VS_DIR_OUTPUT,
1102 	IP_VS_DIR_INPUT_ONLY,
1103 	IP_VS_DIR_LAST,
1104 };
1105 
1106 static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
1107 					 const union nf_inet_addr *caddr,
1108 					 __be16 cport,
1109 					 const union nf_inet_addr *vaddr,
1110 					 __be16 vport,
1111 					 struct ip_vs_conn_param *p)
1112 {
1113 	p->net = net;
1114 	p->af = af;
1115 	p->protocol = protocol;
1116 	p->caddr = caddr;
1117 	p->cport = cport;
1118 	p->vaddr = vaddr;
1119 	p->vport = vport;
1120 	p->pe = NULL;
1121 	p->pe_data = NULL;
1122 }
1123 
1124 struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
1125 struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
1126 
1127 struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
1128 					    const struct ip_vs_iphdr *iph,
1129 					    unsigned int proto_off,
1130 					    int inverse);
1131 
1132 struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
1133 
1134 struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
1135 					     const struct ip_vs_iphdr *iph,
1136 					     unsigned int proto_off,
1137 					     int inverse);
1138 
1139 /* put back the conn without restarting its timer */
1140 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
1141 {
1142 	atomic_dec(&cp->refcnt);
1143 }
1144 extern void ip_vs_conn_put(struct ip_vs_conn *cp);
1145 extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
1146 
1147 struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
1148 				  const union nf_inet_addr *daddr,
1149 				  __be16 dport, unsigned int flags,
1150 				  struct ip_vs_dest *dest, __u32 fwmark);
1151 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
1152 
1153 extern const char * ip_vs_state_name(__u16 proto, int state);
1154 
1155 extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
1156 extern int ip_vs_check_template(struct ip_vs_conn *ct);
1157 extern void ip_vs_random_dropentry(struct net *net);
1158 extern int ip_vs_conn_init(void);
1159 extern void ip_vs_conn_cleanup(void);
1160 
1161 static inline void ip_vs_control_del(struct ip_vs_conn *cp)
1162 {
1163 	struct ip_vs_conn *ctl_cp = cp->control;
1164 	if (!ctl_cp) {
1165 		IP_VS_ERR_BUF("request control DEL for uncontrolled: "
1166 			      "%s:%d to %s:%d\n",
1167 			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1168 			      ntohs(cp->cport),
1169 			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1170 			      ntohs(cp->vport));
1171 
1172 		return;
1173 	}
1174 
1175 	IP_VS_DBG_BUF(7, "DELeting control for: "
1176 		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1177 		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1178 		      ntohs(cp->cport),
1179 		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1180 		      ntohs(ctl_cp->cport));
1181 
1182 	cp->control = NULL;
1183 	if (atomic_read(&ctl_cp->n_control) == 0) {
1184 		IP_VS_ERR_BUF("BUG control DEL with n=0 : "
1185 			      "%s:%d to %s:%d\n",
1186 			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1187 			      ntohs(cp->cport),
1188 			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1189 			      ntohs(cp->vport));
1190 
1191 		return;
1192 	}
1193 	atomic_dec(&ctl_cp->n_control);
1194 }
1195 
1196 static inline void
1197 ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
1198 {
1199 	if (cp->control) {
1200 		IP_VS_ERR_BUF("request control ADD for already controlled: "
1201 			      "%s:%d to %s:%d\n",
1202 			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1203 			      ntohs(cp->cport),
1204 			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1205 			      ntohs(cp->vport));
1206 
1207 		ip_vs_control_del(cp);
1208 	}
1209 
1210 	IP_VS_DBG_BUF(7, "ADDing control for: "
1211 		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1212 		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1213 		      ntohs(cp->cport),
1214 		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1215 		      ntohs(ctl_cp->cport));
1216 
1217 	cp->control = ctl_cp;
1218 	atomic_inc(&ctl_cp->n_control);
1219 }
1220 
1221 /*
1222  * IPVS netns init & cleanup functions
1223  */
1224 extern int ip_vs_estimator_net_init(struct net *net);
1225 extern int ip_vs_control_net_init(struct net *net);
1226 extern int ip_vs_protocol_net_init(struct net *net);
1227 extern int ip_vs_app_net_init(struct net *net);
1228 extern int ip_vs_conn_net_init(struct net *net);
1229 extern int ip_vs_sync_net_init(struct net *net);
1230 extern void ip_vs_conn_net_cleanup(struct net *net);
1231 extern void ip_vs_app_net_cleanup(struct net *net);
1232 extern void ip_vs_protocol_net_cleanup(struct net *net);
1233 extern void ip_vs_control_net_cleanup(struct net *net);
1234 extern void ip_vs_estimator_net_cleanup(struct net *net);
1235 extern void ip_vs_sync_net_cleanup(struct net *net);
1236 extern void ip_vs_service_net_cleanup(struct net *net);
1237 
1238 /*
1239  *      IPVS application functions
1240  *      (from ip_vs_app.c)
1241  */
1242 #define IP_VS_APP_MAX_PORTS  8
1243 extern struct ip_vs_app *register_ip_vs_app(struct net *net,
1244 					    struct ip_vs_app *app);
1245 extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
1246 extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1247 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
1248 extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
1249 				  __u16 proto, __u16 port);
1250 extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
1251 extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
1252 
1253 extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
1254 extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
1255 
1256 void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
1257 void ip_vs_unbind_pe(struct ip_vs_service *svc);
1258 int register_ip_vs_pe(struct ip_vs_pe *pe);
1259 int unregister_ip_vs_pe(struct ip_vs_pe *pe);
1260 struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
1261 struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
1262 
1263 /*
1264  * Use a #define to avoid all of module.h just for these trivial ops
1265  */
1266 #define ip_vs_pe_get(pe)			\
1267 	if (pe && pe->module)			\
1268 		__module_get(pe->module);
1269 
1270 #define ip_vs_pe_put(pe)			\
1271 	if (pe && pe->module)			\
1272 		module_put(pe->module);
1273 
1274 /*
1275  *	IPVS protocol functions (from ip_vs_proto.c)
1276  */
1277 extern int ip_vs_protocol_init(void);
1278 extern void ip_vs_protocol_cleanup(void);
1279 extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
1280 extern int *ip_vs_create_timeout_table(int *table, int size);
1281 extern int
1282 ip_vs_set_state_timeout(int *table, int num, const char *const *names,
1283 			const char *name, int to);
1284 extern void
1285 ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
1286 			  const struct sk_buff *skb,
1287 			  int offset, const char *msg);
1288 
1289 extern struct ip_vs_protocol ip_vs_protocol_tcp;
1290 extern struct ip_vs_protocol ip_vs_protocol_udp;
1291 extern struct ip_vs_protocol ip_vs_protocol_icmp;
1292 extern struct ip_vs_protocol ip_vs_protocol_esp;
1293 extern struct ip_vs_protocol ip_vs_protocol_ah;
1294 extern struct ip_vs_protocol ip_vs_protocol_sctp;
1295 
1296 /*
1297  *      Registering/unregistering scheduler functions
1298  *      (from ip_vs_sched.c)
1299  */
1300 extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1301 extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1302 extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
1303 				struct ip_vs_scheduler *scheduler);
1304 extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
1305 extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
1306 extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
1307 extern struct ip_vs_conn *
1308 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
1309 	       struct ip_vs_proto_data *pd, int *ignored);
1310 extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
1311 			struct ip_vs_proto_data *pd);
1312 
1313 extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
1314 
1315 
1316 /*
1317  *      IPVS control data and functions (from ip_vs_ctl.c)
1318  */
1319 extern struct ip_vs_stats ip_vs_stats;
1320 extern int sysctl_ip_vs_sync_ver;
1321 
1322 extern struct ip_vs_service *
1323 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
1324 		  const union nf_inet_addr *vaddr, __be16 vport);
1325 
1326 static inline void ip_vs_service_put(struct ip_vs_service *svc)
1327 {
1328 	atomic_dec(&svc->usecnt);
1329 }
1330 
1331 extern struct ip_vs_dest *
1332 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
1333 			  const union nf_inet_addr *daddr, __be16 dport);
1334 
1335 extern int ip_vs_use_count_inc(void);
1336 extern void ip_vs_use_count_dec(void);
1337 extern int ip_vs_register_nl_ioctl(void);
1338 extern void ip_vs_unregister_nl_ioctl(void);
1339 extern int ip_vs_control_init(void);
1340 extern void ip_vs_control_cleanup(void);
1341 extern struct ip_vs_dest *
1342 ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
1343 		__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
1344 		__u16 protocol, __u32 fwmark, __u32 flags);
1345 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1346 
1347 
1348 /*
1349  *      IPVS sync daemon data and function prototypes
1350  *      (from ip_vs_sync.c)
1351  */
1352 extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
1353 			     __u8 syncid);
1354 extern int stop_sync_thread(struct net *net, int state);
1355 extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
1356 
1357 
1358 /*
1359  *      IPVS rate estimator prototypes (from ip_vs_est.c)
1360  */
1361 extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
1362 extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
1363 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
1364 extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
1365 				 struct ip_vs_stats *stats);
1366 
1367 /*
1368  *	Various IPVS packet transmitters (from ip_vs_xmit.c)
1369  */
1370 extern int ip_vs_null_xmit
1371 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1372 extern int ip_vs_bypass_xmit
1373 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1374 extern int ip_vs_nat_xmit
1375 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1376 extern int ip_vs_tunnel_xmit
1377 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1378 extern int ip_vs_dr_xmit
1379 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1380 extern int ip_vs_icmp_xmit
1381 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
1382  int offset, unsigned int hooknum);
1383 extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
1384 
1385 #ifdef CONFIG_IP_VS_IPV6
1386 extern int ip_vs_bypass_xmit_v6
1387 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1388 extern int ip_vs_nat_xmit_v6
1389 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1390 extern int ip_vs_tunnel_xmit_v6
1391 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1392 extern int ip_vs_dr_xmit_v6
1393 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1394 extern int ip_vs_icmp_xmit_v6
1395 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
1396  int offset, unsigned int hooknum);
1397 #endif
1398 
1399 #ifdef CONFIG_SYSCTL
1400 /*
1401  *	This is a simple mechanism to ignore packets when
1402  *	we are loaded. Just set ip_vs_drop_rate to 'n' and
1403  *	we start to drop 1/rate of the packets
1404  */
1405 
1406 static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
1407 {
1408 	if (!ipvs->drop_rate)
1409 		return 0;
1410 	if (--ipvs->drop_counter > 0)
1411 		return 0;
1412 	ipvs->drop_counter = ipvs->drop_rate;
1413 	return 1;
1414 }
1415 #else
1416 static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
1417 #endif
1418 
1419 /*
1420  *      ip_vs_fwd_tag returns the forwarding tag of the connection
1421  */
1422 #define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
1423 
1424 static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
1425 {
1426 	char fwd;
1427 
1428 	switch (IP_VS_FWD_METHOD(cp)) {
1429 	case IP_VS_CONN_F_MASQ:
1430 		fwd = 'M'; break;
1431 	case IP_VS_CONN_F_LOCALNODE:
1432 		fwd = 'L'; break;
1433 	case IP_VS_CONN_F_TUNNEL:
1434 		fwd = 'T'; break;
1435 	case IP_VS_CONN_F_DROUTE:
1436 		fwd = 'R'; break;
1437 	case IP_VS_CONN_F_BYPASS:
1438 		fwd = 'B'; break;
1439 	default:
1440 		fwd = '?'; break;
1441 	}
1442 	return fwd;
1443 }
1444 
1445 extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
1446 			   struct ip_vs_conn *cp, int dir);
1447 
1448 #ifdef CONFIG_IP_VS_IPV6
1449 extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
1450 			      struct ip_vs_conn *cp, int dir);
1451 #endif
1452 
1453 extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
1454 
1455 static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
1456 {
1457 	__be32 diff[2] = { ~old, new };
1458 
1459 	return csum_partial(diff, sizeof(diff), oldsum);
1460 }
1461 
1462 #ifdef CONFIG_IP_VS_IPV6
1463 static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
1464 					__wsum oldsum)
1465 {
1466 	__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
1467 			    new[3],  new[2],  new[1],  new[0] };
1468 
1469 	return csum_partial(diff, sizeof(diff), oldsum);
1470 }
1471 #endif
1472 
1473 static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
1474 {
1475 	__be16 diff[2] = { ~old, new };
1476 
1477 	return csum_partial(diff, sizeof(diff), oldsum);
1478 }
1479 
1480 /*
1481  * Forget current conntrack (unconfirmed) and attach notrack entry
1482  */
1483 static inline void ip_vs_notrack(struct sk_buff *skb)
1484 {
1485 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1486 	enum ip_conntrack_info ctinfo;
1487 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1488 
1489 	if (!ct || !nf_ct_is_untracked(ct)) {
1490 		nf_conntrack_put(skb->nfct);
1491 		skb->nfct = &nf_ct_untracked_get()->ct_general;
1492 		skb->nfctinfo = IP_CT_NEW;
1493 		nf_conntrack_get(skb->nfct);
1494 	}
1495 #endif
1496 }
1497 
1498 #ifdef CONFIG_IP_VS_NFCT
1499 /*
1500  *      Netfilter connection tracking
1501  *      (from ip_vs_nfct.c)
1502  */
1503 static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1504 {
1505 #ifdef CONFIG_SYSCTL
1506 	return ipvs->sysctl_conntrack;
1507 #else
1508 	return 0;
1509 #endif
1510 }
1511 
1512 extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
1513 				   int outin);
1514 extern int ip_vs_confirm_conntrack(struct sk_buff *skb);
1515 extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
1516 				      struct ip_vs_conn *cp, u_int8_t proto,
1517 				      const __be16 port, int from_rs);
1518 extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
1519 
1520 #else
1521 
1522 static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1523 {
1524 	return 0;
1525 }
1526 
1527 static inline void ip_vs_update_conntrack(struct sk_buff *skb,
1528 					  struct ip_vs_conn *cp, int outin)
1529 {
1530 }
1531 
1532 static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
1533 {
1534 	return NF_ACCEPT;
1535 }
1536 
1537 static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
1538 {
1539 }
1540 /* CONFIG_IP_VS_NFCT */
1541 #endif
1542 
1543 static inline unsigned int
1544 ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
1545 {
1546 	/*
1547 	 * We think the overhead of processing active connections is 256
1548 	 * times higher than that of inactive connections in average. (This
1549 	 * 256 times might not be accurate, we will change it later) We
1550 	 * use the following formula to estimate the overhead now:
1551 	 *		  dest->activeconns*256 + dest->inactconns
1552 	 */
1553 	return (atomic_read(&dest->activeconns) << 8) +
1554 		atomic_read(&dest->inactconns);
1555 }
1556 
1557 #endif	/* _NET_IP_VS_H */
1558