xref: /openbmc/linux/include/net/ip_vs.h (revision ecfb9f40)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* IP Virtual Server
3  * data structure and functionality definitions
4  */
5 
6 #ifndef _NET_IP_VS_H
7 #define _NET_IP_VS_H
8 
9 #include <linux/ip_vs.h>                /* definitions shared with userland */
10 
11 #include <asm/types.h>                  /* for __uXX types */
12 
13 #include <linux/list.h>                 /* for struct list_head */
14 #include <linux/spinlock.h>             /* for struct rwlock_t */
15 #include <linux/atomic.h>               /* for struct atomic_t */
16 #include <linux/refcount.h>             /* for struct refcount_t */
17 #include <linux/workqueue.h>
18 
19 #include <linux/compiler.h>
20 #include <linux/timer.h>
21 #include <linux/bug.h>
22 
23 #include <net/checksum.h>
24 #include <linux/netfilter.h>		/* for union nf_inet_addr */
25 #include <linux/ip.h>
26 #include <linux/ipv6.h>			/* for struct ipv6hdr */
27 #include <net/ipv6.h>
28 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
29 #include <net/netfilter/nf_conntrack.h>
30 #endif
31 #include <net/net_namespace.h>		/* Netw namespace */
32 #include <linux/sched/isolation.h>
33 
34 #define IP_VS_HDR_INVERSE	1
35 #define IP_VS_HDR_ICMP		2
36 
37 /* Generic access of ipvs struct */
38 static inline struct netns_ipvs *net_ipvs(struct net* net)
39 {
40 	return net->ipvs;
41 }
42 
43 /* Connections' size value needed by ip_vs_ctl.c */
44 extern int ip_vs_conn_tab_size;
45 
46 extern struct mutex __ip_vs_mutex;
47 
48 struct ip_vs_iphdr {
49 	int hdr_flags;	/* ipvs flags */
50 	__u32 off;	/* Where IP or IPv4 header starts */
51 	__u32 len;	/* IPv4 simply where L4 starts
52 			 * IPv6 where L4 Transport Header starts */
53 	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
54 	__s16 protocol;
55 	__s32 flags;
56 	union nf_inet_addr saddr;
57 	union nf_inet_addr daddr;
58 };
59 
60 static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
61 				      int len, void *buffer)
62 {
63 	return skb_header_pointer(skb, offset, len, buffer);
64 }
65 
66 /* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6.
67  * IPv6 requires some extra work, as finding proper header position,
68  * depend on the IPv6 extension headers.
69  */
70 static inline int
71 ip_vs_fill_iph_skb_off(int af, const struct sk_buff *skb, int offset,
72 		       int hdr_flags, struct ip_vs_iphdr *iphdr)
73 {
74 	iphdr->hdr_flags = hdr_flags;
75 	iphdr->off = offset;
76 
77 #ifdef CONFIG_IP_VS_IPV6
78 	if (af == AF_INET6) {
79 		struct ipv6hdr _iph;
80 		const struct ipv6hdr *iph = skb_header_pointer(
81 			skb, offset, sizeof(_iph), &_iph);
82 		if (!iph)
83 			return 0;
84 
85 		iphdr->saddr.in6 = iph->saddr;
86 		iphdr->daddr.in6 = iph->daddr;
87 		/* ipv6_find_hdr() updates len, flags */
88 		iphdr->len	 = offset;
89 		iphdr->flags	 = 0;
90 		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
91 						 &iphdr->fragoffs,
92 						 &iphdr->flags);
93 		if (iphdr->protocol < 0)
94 			return 0;
95 	} else
96 #endif
97 	{
98 		struct iphdr _iph;
99 		const struct iphdr *iph = skb_header_pointer(
100 			skb, offset, sizeof(_iph), &_iph);
101 		if (!iph)
102 			return 0;
103 
104 		iphdr->len	= offset + iph->ihl * 4;
105 		iphdr->fragoffs	= 0;
106 		iphdr->protocol	= iph->protocol;
107 		iphdr->saddr.ip	= iph->saddr;
108 		iphdr->daddr.ip	= iph->daddr;
109 	}
110 
111 	return 1;
112 }
113 
114 static inline int
115 ip_vs_fill_iph_skb_icmp(int af, const struct sk_buff *skb, int offset,
116 			bool inverse, struct ip_vs_iphdr *iphdr)
117 {
118 	int hdr_flags = IP_VS_HDR_ICMP;
119 
120 	if (inverse)
121 		hdr_flags |= IP_VS_HDR_INVERSE;
122 
123 	return ip_vs_fill_iph_skb_off(af, skb, offset, hdr_flags, iphdr);
124 }
125 
126 static inline int
127 ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, bool inverse,
128 		   struct ip_vs_iphdr *iphdr)
129 {
130 	int hdr_flags = 0;
131 
132 	if (inverse)
133 		hdr_flags |= IP_VS_HDR_INVERSE;
134 
135 	return ip_vs_fill_iph_skb_off(af, skb, skb_network_offset(skb),
136 				      hdr_flags, iphdr);
137 }
138 
139 static inline bool
140 ip_vs_iph_inverse(const struct ip_vs_iphdr *iph)
141 {
142 	return !!(iph->hdr_flags & IP_VS_HDR_INVERSE);
143 }
144 
145 static inline bool
146 ip_vs_iph_icmp(const struct ip_vs_iphdr *iph)
147 {
148 	return !!(iph->hdr_flags & IP_VS_HDR_ICMP);
149 }
150 
151 static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
152 				   const union nf_inet_addr *src)
153 {
154 #ifdef CONFIG_IP_VS_IPV6
155 	if (af == AF_INET6)
156 		dst->in6 = src->in6;
157 	else
158 #endif
159 	dst->ip = src->ip;
160 }
161 
162 static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
163 				  const union nf_inet_addr *src)
164 {
165 #ifdef CONFIG_IP_VS_IPV6
166 	if (af == AF_INET6) {
167 		dst->in6 = src->in6;
168 		return;
169 	}
170 #endif
171 	dst->ip = src->ip;
172 	dst->all[1] = 0;
173 	dst->all[2] = 0;
174 	dst->all[3] = 0;
175 }
176 
177 static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
178 				   const union nf_inet_addr *b)
179 {
180 #ifdef CONFIG_IP_VS_IPV6
181 	if (af == AF_INET6)
182 		return ipv6_addr_equal(&a->in6, &b->in6);
183 #endif
184 	return a->ip == b->ip;
185 }
186 
187 #ifdef CONFIG_IP_VS_DEBUG
188 #include <linux/net.h>
189 
190 int ip_vs_get_debug_level(void);
191 
192 static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
193 					 const union nf_inet_addr *addr,
194 					 int *idx)
195 {
196 	int len;
197 #ifdef CONFIG_IP_VS_IPV6
198 	if (af == AF_INET6)
199 		len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6c]",
200 			       &addr->in6) + 1;
201 	else
202 #endif
203 		len = snprintf(&buf[*idx], buf_len - *idx, "%pI4",
204 			       &addr->ip) + 1;
205 
206 	*idx += len;
207 	BUG_ON(*idx > buf_len + 1);
208 	return &buf[*idx - len];
209 }
210 
211 #define IP_VS_DBG_BUF(level, msg, ...)					\
212 	do {								\
213 		char ip_vs_dbg_buf[160];				\
214 		int ip_vs_dbg_idx = 0;					\
215 		if (level <= ip_vs_get_debug_level())			\
216 			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
217 	} while (0)
218 #define IP_VS_ERR_BUF(msg...)						\
219 	do {								\
220 		char ip_vs_dbg_buf[160];				\
221 		int ip_vs_dbg_idx = 0;					\
222 		pr_err(msg);						\
223 	} while (0)
224 
225 /* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
226 #define IP_VS_DBG_ADDR(af, addr)					\
227 	ip_vs_dbg_addr(af, ip_vs_dbg_buf,				\
228 		       sizeof(ip_vs_dbg_buf), addr,			\
229 		       &ip_vs_dbg_idx)
230 
231 #define IP_VS_DBG(level, msg, ...)					\
232 	do {								\
233 		if (level <= ip_vs_get_debug_level())			\
234 			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
235 	} while (0)
236 #define IP_VS_DBG_RL(msg, ...)						\
237 	do {								\
238 		if (net_ratelimit())					\
239 			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
240 	} while (0)
241 #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)			\
242 	do {								\
243 		if (level <= ip_vs_get_debug_level())			\
244 			pp->debug_packet(af, pp, skb, ofs, msg);	\
245 	} while (0)
246 #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)			\
247 	do {								\
248 		if (level <= ip_vs_get_debug_level() &&			\
249 		    net_ratelimit())					\
250 			pp->debug_packet(af, pp, skb, ofs, msg);	\
251 	} while (0)
252 #else	/* NO DEBUGGING at ALL */
253 #define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
254 #define IP_VS_ERR_BUF(msg...)  do {} while (0)
255 #define IP_VS_DBG(level, msg...)  do {} while (0)
256 #define IP_VS_DBG_RL(msg...)  do {} while (0)
257 #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
258 #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
259 #endif
260 
261 #define IP_VS_BUG() BUG()
262 #define IP_VS_ERR_RL(msg, ...)						\
263 	do {								\
264 		if (net_ratelimit())					\
265 			pr_err(msg, ##__VA_ARGS__);			\
266 	} while (0)
267 
268 #ifdef CONFIG_IP_VS_DEBUG
269 #define EnterFunction(level)						\
270 	do {								\
271 		if (level <= ip_vs_get_debug_level())			\
272 			printk(KERN_DEBUG				\
273 			       pr_fmt("Enter: %s, %s line %i\n"),	\
274 			       __func__, __FILE__, __LINE__);		\
275 	} while (0)
276 #define LeaveFunction(level)						\
277 	do {								\
278 		if (level <= ip_vs_get_debug_level())			\
279 			printk(KERN_DEBUG				\
280 			       pr_fmt("Leave: %s, %s line %i\n"),	\
281 			       __func__, __FILE__, __LINE__);		\
282 	} while (0)
283 #else
284 #define EnterFunction(level)   do {} while (0)
285 #define LeaveFunction(level)   do {} while (0)
286 #endif
287 
288 /* The port number of FTP service (in network order). */
289 #define FTPPORT  cpu_to_be16(21)
290 #define FTPDATA  cpu_to_be16(20)
291 
292 /* TCP State Values */
293 enum {
294 	IP_VS_TCP_S_NONE = 0,
295 	IP_VS_TCP_S_ESTABLISHED,
296 	IP_VS_TCP_S_SYN_SENT,
297 	IP_VS_TCP_S_SYN_RECV,
298 	IP_VS_TCP_S_FIN_WAIT,
299 	IP_VS_TCP_S_TIME_WAIT,
300 	IP_VS_TCP_S_CLOSE,
301 	IP_VS_TCP_S_CLOSE_WAIT,
302 	IP_VS_TCP_S_LAST_ACK,
303 	IP_VS_TCP_S_LISTEN,
304 	IP_VS_TCP_S_SYNACK,
305 	IP_VS_TCP_S_LAST
306 };
307 
308 /* UDP State Values */
309 enum {
310 	IP_VS_UDP_S_NORMAL,
311 	IP_VS_UDP_S_LAST,
312 };
313 
314 /* ICMP State Values */
315 enum {
316 	IP_VS_ICMP_S_NORMAL,
317 	IP_VS_ICMP_S_LAST,
318 };
319 
320 /* SCTP State Values */
321 enum ip_vs_sctp_states {
322 	IP_VS_SCTP_S_NONE,
323 	IP_VS_SCTP_S_INIT1,
324 	IP_VS_SCTP_S_INIT,
325 	IP_VS_SCTP_S_COOKIE_SENT,
326 	IP_VS_SCTP_S_COOKIE_REPLIED,
327 	IP_VS_SCTP_S_COOKIE_WAIT,
328 	IP_VS_SCTP_S_COOKIE,
329 	IP_VS_SCTP_S_COOKIE_ECHOED,
330 	IP_VS_SCTP_S_ESTABLISHED,
331 	IP_VS_SCTP_S_SHUTDOWN_SENT,
332 	IP_VS_SCTP_S_SHUTDOWN_RECEIVED,
333 	IP_VS_SCTP_S_SHUTDOWN_ACK_SENT,
334 	IP_VS_SCTP_S_REJECTED,
335 	IP_VS_SCTP_S_CLOSED,
336 	IP_VS_SCTP_S_LAST
337 };
338 
339 /* Connection templates use bits from state */
340 #define IP_VS_CTPL_S_NONE		0x0000
341 #define IP_VS_CTPL_S_ASSURED		0x0001
342 #define IP_VS_CTPL_S_LAST		0x0002
343 
344 /* Delta sequence info structure
345  * Each ip_vs_conn has 2 (output AND input seq. changes).
346  * Only used in the VS/NAT.
347  */
348 struct ip_vs_seq {
349 	__u32			init_seq;	/* Add delta from this seq */
350 	__u32			delta;		/* Delta in sequence numbers */
351 	__u32			previous_delta;	/* Delta in sequence numbers
352 						 * before last resized pkt */
353 };
354 
355 /* counters per cpu */
356 struct ip_vs_counters {
357 	u64_stats_t	conns;		/* connections scheduled */
358 	u64_stats_t	inpkts;		/* incoming packets */
359 	u64_stats_t	outpkts;	/* outgoing packets */
360 	u64_stats_t	inbytes;	/* incoming bytes */
361 	u64_stats_t	outbytes;	/* outgoing bytes */
362 };
363 /* Stats per cpu */
364 struct ip_vs_cpu_stats {
365 	struct ip_vs_counters   cnt;
366 	struct u64_stats_sync   syncp;
367 };
368 
369 /* Default nice for estimator kthreads */
370 #define IPVS_EST_NICE		0
371 
372 /* IPVS statistics objects */
373 struct ip_vs_estimator {
374 	struct hlist_node	list;
375 
376 	u64			last_inbytes;
377 	u64			last_outbytes;
378 	u64			last_conns;
379 	u64			last_inpkts;
380 	u64			last_outpkts;
381 
382 	u64			cps;
383 	u64			inpps;
384 	u64			outpps;
385 	u64			inbps;
386 	u64			outbps;
387 
388 	s32			ktid:16,	/* kthread ID, -1=temp list */
389 				ktrow:8,	/* row/tick ID for kthread */
390 				ktcid:8;	/* chain ID for kthread tick */
391 };
392 
393 /*
394  * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
395  */
396 struct ip_vs_kstats {
397 	u64			conns;		/* connections scheduled */
398 	u64			inpkts;		/* incoming packets */
399 	u64			outpkts;	/* outgoing packets */
400 	u64			inbytes;	/* incoming bytes */
401 	u64			outbytes;	/* outgoing bytes */
402 
403 	u64			cps;		/* current connection rate */
404 	u64			inpps;		/* current in packet rate */
405 	u64			outpps;		/* current out packet rate */
406 	u64			inbps;		/* current in byte rate */
407 	u64			outbps;		/* current out byte rate */
408 };
409 
410 struct ip_vs_stats {
411 	struct ip_vs_kstats	kstats;		/* kernel statistics */
412 	struct ip_vs_estimator	est;		/* estimator */
413 	struct ip_vs_cpu_stats __percpu	*cpustats;	/* per cpu counters */
414 	spinlock_t		lock;		/* spin lock */
415 	struct ip_vs_kstats	kstats0;	/* reset values */
416 };
417 
418 struct ip_vs_stats_rcu {
419 	struct ip_vs_stats	s;
420 	struct rcu_head		rcu_head;
421 };
422 
423 int ip_vs_stats_init_alloc(struct ip_vs_stats *s);
424 struct ip_vs_stats *ip_vs_stats_alloc(void);
425 void ip_vs_stats_release(struct ip_vs_stats *stats);
426 void ip_vs_stats_free(struct ip_vs_stats *stats);
427 
428 /* Process estimators in multiple timer ticks (20/50/100, see ktrow) */
429 #define IPVS_EST_NTICKS		50
430 /* Estimation uses a 2-second period containing ticks (in jiffies) */
431 #define IPVS_EST_TICK		((2 * HZ) / IPVS_EST_NTICKS)
432 
433 /* Limit of CPU load per kthread (8 for 12.5%), ratio of CPU capacity (1/C).
434  * Value of 4 and above ensures kthreads will take work without exceeding
435  * the CPU capacity under different circumstances.
436  */
437 #define IPVS_EST_LOAD_DIVISOR	8
438 
439 /* Kthreads should not have work that exceeds the CPU load above 50% */
440 #define IPVS_EST_CPU_KTHREADS	(IPVS_EST_LOAD_DIVISOR / 2)
441 
442 /* Desired number of chains per timer tick (chain load factor in 100us units),
443  * 48=4.8ms of 40ms tick (12% CPU usage):
444  * 2 sec * 1000 ms in sec * 10 (100us in ms) / 8 (12.5%) / 50
445  */
446 #define IPVS_EST_CHAIN_FACTOR	\
447 	ALIGN_DOWN(2 * 1000 * 10 / IPVS_EST_LOAD_DIVISOR / IPVS_EST_NTICKS, 8)
448 
449 /* Compiled number of chains per tick
450  * The defines should match cond_resched_rcu
451  */
452 #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
453 #define IPVS_EST_TICK_CHAINS	IPVS_EST_CHAIN_FACTOR
454 #else
455 #define IPVS_EST_TICK_CHAINS	1
456 #endif
457 
458 #if IPVS_EST_NTICKS > 127
459 #error Too many timer ticks for ktrow
460 #endif
461 
462 /* Multiple chains processed in same tick */
463 struct ip_vs_est_tick_data {
464 	struct hlist_head	chains[IPVS_EST_TICK_CHAINS];
465 	DECLARE_BITMAP(present, IPVS_EST_TICK_CHAINS);
466 	DECLARE_BITMAP(full, IPVS_EST_TICK_CHAINS);
467 	int			chain_len[IPVS_EST_TICK_CHAINS];
468 };
469 
470 /* Context for estimation kthread */
471 struct ip_vs_est_kt_data {
472 	struct netns_ipvs	*ipvs;
473 	struct task_struct	*task;		/* task if running */
474 	struct ip_vs_est_tick_data __rcu *ticks[IPVS_EST_NTICKS];
475 	DECLARE_BITMAP(avail, IPVS_EST_NTICKS);	/* tick has space for ests */
476 	unsigned long		est_timer;	/* estimation timer (jiffies) */
477 	struct ip_vs_stats	*calc_stats;	/* Used for calculation */
478 	int			tick_len[IPVS_EST_NTICKS];	/* est count */
479 	int			id;		/* ktid per netns */
480 	int			chain_max;	/* max ests per tick chain */
481 	int			tick_max;	/* max ests per tick */
482 	int			est_count;	/* attached ests to kthread */
483 	int			est_max_count;	/* max ests per kthread */
484 	int			add_row;	/* row for new ests */
485 	int			est_row;	/* estimated row */
486 };
487 
488 struct dst_entry;
489 struct iphdr;
490 struct ip_vs_conn;
491 struct ip_vs_app;
492 struct sk_buff;
493 struct ip_vs_proto_data;
494 
495 struct ip_vs_protocol {
496 	struct ip_vs_protocol	*next;
497 	char			*name;
498 	u16			protocol;
499 	u16			num_states;
500 	int			dont_defrag;
501 
502 	void (*init)(struct ip_vs_protocol *pp);
503 
504 	void (*exit)(struct ip_vs_protocol *pp);
505 
506 	int (*init_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);
507 
508 	void (*exit_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);
509 
510 	int (*conn_schedule)(struct netns_ipvs *ipvs,
511 			     int af, struct sk_buff *skb,
512 			     struct ip_vs_proto_data *pd,
513 			     int *verdict, struct ip_vs_conn **cpp,
514 			     struct ip_vs_iphdr *iph);
515 
516 	struct ip_vs_conn *
517 	(*conn_in_get)(struct netns_ipvs *ipvs,
518 		       int af,
519 		       const struct sk_buff *skb,
520 		       const struct ip_vs_iphdr *iph);
521 
522 	struct ip_vs_conn *
523 	(*conn_out_get)(struct netns_ipvs *ipvs,
524 			int af,
525 			const struct sk_buff *skb,
526 			const struct ip_vs_iphdr *iph);
527 
528 	int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
529 			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
530 
531 	int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
532 			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
533 
534 	const char *(*state_name)(int state);
535 
536 	void (*state_transition)(struct ip_vs_conn *cp, int direction,
537 				 const struct sk_buff *skb,
538 				 struct ip_vs_proto_data *pd);
539 
540 	int (*register_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);
541 
542 	void (*unregister_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);
543 
544 	int (*app_conn_bind)(struct ip_vs_conn *cp);
545 
546 	void (*debug_packet)(int af, struct ip_vs_protocol *pp,
547 			     const struct sk_buff *skb,
548 			     int offset,
549 			     const char *msg);
550 
551 	void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
552 };
553 
554 /* protocol data per netns */
555 struct ip_vs_proto_data {
556 	struct ip_vs_proto_data	*next;
557 	struct ip_vs_protocol	*pp;
558 	int			*timeout_table;	/* protocol timeout table */
559 	atomic_t		appcnt;		/* counter of proto app incs. */
560 	struct tcp_states_t	*tcp_state_table;
561 };
562 
563 struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
564 struct ip_vs_proto_data *ip_vs_proto_data_get(struct netns_ipvs *ipvs,
565 					      unsigned short proto);
566 
567 struct ip_vs_conn_param {
568 	struct netns_ipvs		*ipvs;
569 	const union nf_inet_addr	*caddr;
570 	const union nf_inet_addr	*vaddr;
571 	__be16				cport;
572 	__be16				vport;
573 	__u16				protocol;
574 	u16				af;
575 
576 	const struct ip_vs_pe		*pe;
577 	char				*pe_data;
578 	__u8				pe_data_len;
579 };
580 
581 /* IP_VS structure allocated for each dynamically scheduled connection */
582 struct ip_vs_conn {
583 	struct hlist_node	c_list;         /* hashed list heads */
584 	/* Protocol, addresses and port numbers */
585 	__be16                  cport;
586 	__be16                  dport;
587 	__be16                  vport;
588 	u16			af;		/* address family */
589 	union nf_inet_addr      caddr;          /* client address */
590 	union nf_inet_addr      vaddr;          /* virtual address */
591 	union nf_inet_addr      daddr;          /* destination address */
592 	volatile __u32          flags;          /* status flags */
593 	__u16                   protocol;       /* Which protocol (TCP/UDP) */
594 	__u16			daf;		/* Address family of the dest */
595 	struct netns_ipvs	*ipvs;
596 
597 	/* counter and timer */
598 	refcount_t		refcnt;		/* reference count */
599 	struct timer_list	timer;		/* Expiration timer */
600 	volatile unsigned long	timeout;	/* timeout */
601 
602 	/* Flags and state transition */
603 	spinlock_t              lock;           /* lock for state transition */
604 	volatile __u16          state;          /* state info */
605 	volatile __u16          old_state;      /* old state, to be used for
606 						 * state transition triggerd
607 						 * synchronization
608 						 */
609 	__u32			fwmark;		/* Fire wall mark from skb */
610 	unsigned long		sync_endtime;	/* jiffies + sent_retries */
611 
612 	/* Control members */
613 	struct ip_vs_conn       *control;       /* Master control connection */
614 	atomic_t                n_control;      /* Number of controlled ones */
615 	struct ip_vs_dest       *dest;          /* real server */
616 	atomic_t                in_pkts;        /* incoming packet counter */
617 
618 	/* Packet transmitter for different forwarding methods.  If it
619 	 * mangles the packet, it must return NF_DROP or better NF_STOLEN,
620 	 * otherwise this must be changed to a sk_buff **.
621 	 * NF_ACCEPT can be returned when destination is local.
622 	 */
623 	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
624 			   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
625 
626 	/* Note: we can group the following members into a structure,
627 	 * in order to save more space, and the following members are
628 	 * only used in VS/NAT anyway
629 	 */
630 	struct ip_vs_app        *app;           /* bound ip_vs_app object */
631 	void                    *app_data;      /* Application private data */
632 	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
633 	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
634 
635 	const struct ip_vs_pe	*pe;
636 	char			*pe_data;
637 	__u8			pe_data_len;
638 
639 	struct rcu_head		rcu_head;
640 };
641 
642 /* Extended internal versions of struct ip_vs_service_user and ip_vs_dest_user
643  * for IPv6 support.
644  *
645  * We need these to conveniently pass around service and destination
646  * options, but unfortunately, we also need to keep the old definitions to
647  * maintain userspace backwards compatibility for the setsockopt interface.
648  */
649 struct ip_vs_service_user_kern {
650 	/* virtual service addresses */
651 	u16			af;
652 	u16			protocol;
653 	union nf_inet_addr	addr;		/* virtual ip address */
654 	__be16			port;
655 	u32			fwmark;		/* firwall mark of service */
656 
657 	/* virtual service options */
658 	char			*sched_name;
659 	char			*pe_name;
660 	unsigned int		flags;		/* virtual service flags */
661 	unsigned int		timeout;	/* persistent timeout in sec */
662 	__be32			netmask;	/* persistent netmask or plen */
663 };
664 
665 
666 struct ip_vs_dest_user_kern {
667 	/* destination server address */
668 	union nf_inet_addr	addr;
669 	__be16			port;
670 
671 	/* real server options */
672 	unsigned int		conn_flags;	/* connection flags */
673 	int			weight;		/* destination weight */
674 
675 	/* thresholds for active connections */
676 	u32			u_threshold;	/* upper threshold */
677 	u32			l_threshold;	/* lower threshold */
678 
679 	/* Address family of addr */
680 	u16			af;
681 
682 	u16			tun_type;	/* tunnel type */
683 	__be16			tun_port;	/* tunnel port */
684 	u16			tun_flags;	/* tunnel flags */
685 };
686 
687 
688 /*
689  * The information about the virtual service offered to the net and the
690  * forwarding entries.
691  */
692 struct ip_vs_service {
693 	struct hlist_node	s_list;   /* for normal service table */
694 	struct hlist_node	f_list;   /* for fwmark-based service table */
695 	atomic_t		refcnt;   /* reference counter */
696 
697 	u16			af;       /* address family */
698 	__u16			protocol; /* which protocol (TCP/UDP) */
699 	union nf_inet_addr	addr;	  /* IP address for virtual service */
700 	__be16			port;	  /* port number for the service */
701 	__u32                   fwmark;   /* firewall mark of the service */
702 	unsigned int		flags;	  /* service status flags */
703 	unsigned int		timeout;  /* persistent timeout in ticks */
704 	__be32			netmask;  /* grouping granularity, mask/plen */
705 	struct netns_ipvs	*ipvs;
706 
707 	struct list_head	destinations;  /* real server d-linked list */
708 	__u32			num_dests;     /* number of servers */
709 	struct ip_vs_stats      stats;         /* statistics for the service */
710 
711 	/* for scheduling */
712 	struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
713 	spinlock_t		sched_lock;    /* lock sched_data */
714 	void			*sched_data;   /* scheduler application data */
715 
716 	/* alternate persistence engine */
717 	struct ip_vs_pe __rcu	*pe;
718 	int			conntrack_afmask;
719 
720 	struct rcu_head		rcu_head;
721 };
722 
723 /* Information for cached dst */
724 struct ip_vs_dest_dst {
725 	struct dst_entry	*dst_cache;	/* destination cache entry */
726 	u32			dst_cookie;
727 	union nf_inet_addr	dst_saddr;
728 	struct rcu_head		rcu_head;
729 };
730 
731 /* The real server destination forwarding entry with ip address, port number,
732  * and so on.
733  */
734 struct ip_vs_dest {
735 	struct list_head	n_list;   /* for the dests in the service */
736 	struct hlist_node	d_list;   /* for table with all the dests */
737 
738 	u16			af;		/* address family */
739 	__be16			port;		/* port number of the server */
740 	union nf_inet_addr	addr;		/* IP address of the server */
741 	volatile unsigned int	flags;		/* dest status flags */
742 	atomic_t		conn_flags;	/* flags to copy to conn */
743 	atomic_t		weight;		/* server weight */
744 	atomic_t		last_weight;	/* server latest weight */
745 	__u16			tun_type;	/* tunnel type */
746 	__be16			tun_port;	/* tunnel port */
747 	__u16			tun_flags;	/* tunnel flags */
748 
749 	refcount_t		refcnt;		/* reference counter */
750 	struct ip_vs_stats      stats;          /* statistics */
751 	unsigned long		idle_start;	/* start time, jiffies */
752 
753 	/* connection counters and thresholds */
754 	atomic_t		activeconns;	/* active connections */
755 	atomic_t		inactconns;	/* inactive connections */
756 	atomic_t		persistconns;	/* persistent connections */
757 	__u32			u_threshold;	/* upper threshold */
758 	__u32			l_threshold;	/* lower threshold */
759 
760 	/* for destination cache */
761 	spinlock_t		dst_lock;	/* lock of dst_cache */
762 	struct ip_vs_dest_dst __rcu *dest_dst;	/* cached dst info */
763 
764 	/* for virtual service */
765 	struct ip_vs_service __rcu *svc;	/* service it belongs to */
766 	__u16			protocol;	/* which protocol (TCP/UDP) */
767 	__be16			vport;		/* virtual port number */
768 	union nf_inet_addr	vaddr;		/* virtual IP address */
769 	__u32			vfwmark;	/* firewall mark of service */
770 
771 	struct rcu_head		rcu_head;
772 	struct list_head	t_list;		/* in dest_trash */
773 	unsigned int		in_rs_table:1;	/* we are in rs_table */
774 };
775 
776 /* The scheduler object */
777 struct ip_vs_scheduler {
778 	struct list_head	n_list;		/* d-linked list head */
779 	char			*name;		/* scheduler name */
780 	atomic_t		refcnt;		/* reference counter */
781 	struct module		*module;	/* THIS_MODULE/NULL */
782 
783 	/* scheduler initializing service */
784 	int (*init_service)(struct ip_vs_service *svc);
785 	/* scheduling service finish */
786 	void (*done_service)(struct ip_vs_service *svc);
787 	/* dest is linked */
788 	int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
789 	/* dest is unlinked */
790 	int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
791 	/* dest is updated */
792 	int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
793 
794 	/* selecting a server from the given service */
795 	struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
796 				       const struct sk_buff *skb,
797 				       struct ip_vs_iphdr *iph);
798 };
799 
800 /* The persistence engine object */
801 struct ip_vs_pe {
802 	struct list_head	n_list;		/* d-linked list head */
803 	char			*name;		/* scheduler name */
804 	atomic_t		refcnt;		/* reference counter */
805 	struct module		*module;	/* THIS_MODULE/NULL */
806 
807 	/* get the connection template, if any */
808 	int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
809 	bool (*ct_match)(const struct ip_vs_conn_param *p,
810 			 struct ip_vs_conn *ct);
811 	u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
812 			   bool inverse);
813 	int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
814 	/* create connections for real-server outgoing packets */
815 	struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc,
816 				       struct ip_vs_dest *dest,
817 				       struct sk_buff *skb,
818 				       const struct ip_vs_iphdr *iph,
819 				       __be16 dport, __be16 cport);
820 };
821 
822 /* The application module object (a.k.a. app incarnation) */
823 struct ip_vs_app {
824 	struct list_head	a_list;		/* member in app list */
825 	int			type;		/* IP_VS_APP_TYPE_xxx */
826 	char			*name;		/* application module name */
827 	__u16			protocol;
828 	struct module		*module;	/* THIS_MODULE/NULL */
829 	struct list_head	incs_list;	/* list of incarnations */
830 
831 	/* members for application incarnations */
832 	struct list_head	p_list;		/* member in proto app list */
833 	struct ip_vs_app	*app;		/* its real application */
834 	__be16			port;		/* port number in net order */
835 	atomic_t		usecnt;		/* usage counter */
836 	struct rcu_head		rcu_head;
837 
838 	/* output hook: Process packet in inout direction, diff set for TCP.
839 	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
840 	 *	   2=Mangled but checksum was not updated
841 	 */
842 	int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
843 		       struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
844 
845 	/* input hook: Process packet in outin direction, diff set for TCP.
846 	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
847 	 *	   2=Mangled but checksum was not updated
848 	 */
849 	int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
850 		      struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
851 
852 	/* ip_vs_app initializer */
853 	int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
854 
855 	/* ip_vs_app finish */
856 	int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *);
857 
858 
859 	/* not used now */
860 	int (*bind_conn)(struct ip_vs_app *, struct ip_vs_conn *,
861 			 struct ip_vs_protocol *);
862 
863 	void (*unbind_conn)(struct ip_vs_app *, struct ip_vs_conn *);
864 
865 	int *			timeout_table;
866 	int *			timeouts;
867 	int			timeouts_size;
868 
869 	int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app,
870 			     int *verdict, struct ip_vs_conn **cpp);
871 
872 	struct ip_vs_conn *
873 	(*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
874 		       const struct iphdr *iph, int inverse);
875 
876 	struct ip_vs_conn *
877 	(*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
878 			const struct iphdr *iph, int inverse);
879 
880 	int (*state_transition)(struct ip_vs_conn *cp, int direction,
881 				const struct sk_buff *skb,
882 				struct ip_vs_app *app);
883 
884 	void (*timeout_change)(struct ip_vs_app *app, int flags);
885 };
886 
887 struct ipvs_master_sync_state {
888 	struct list_head	sync_queue;
889 	struct ip_vs_sync_buff	*sync_buff;
890 	unsigned long		sync_queue_len;
891 	unsigned int		sync_queue_delay;
892 	struct delayed_work	master_wakeup_work;
893 	struct netns_ipvs	*ipvs;
894 };
895 
896 struct ip_vs_sync_thread_data;
897 
898 /* How much time to keep dests in trash */
899 #define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)
900 
901 struct ipvs_sync_daemon_cfg {
902 	union nf_inet_addr	mcast_group;
903 	int			syncid;
904 	u16			sync_maxlen;
905 	u16			mcast_port;
906 	u8			mcast_af;
907 	u8			mcast_ttl;
908 	/* multicast interface name */
909 	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
910 };
911 
912 /* IPVS in network namespace */
913 struct netns_ipvs {
914 	int			gen;		/* Generation */
915 	int			enable;		/* enable like nf_hooks do */
916 	/* Hash table: for real service lookups */
917 	#define IP_VS_RTAB_BITS 4
918 	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
919 	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
920 
921 	struct hlist_head	rs_table[IP_VS_RTAB_SIZE];
922 	/* ip_vs_app */
923 	struct list_head	app_list;
924 	/* ip_vs_proto */
925 	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
926 	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
927 	/* ip_vs_proto_tcp */
928 #ifdef CONFIG_IP_VS_PROTO_TCP
929 	#define	TCP_APP_TAB_BITS	4
930 	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
931 	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
932 	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
933 #endif
934 	/* ip_vs_proto_udp */
935 #ifdef CONFIG_IP_VS_PROTO_UDP
936 	#define	UDP_APP_TAB_BITS	4
937 	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
938 	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
939 	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
940 #endif
941 	/* ip_vs_proto_sctp */
942 #ifdef CONFIG_IP_VS_PROTO_SCTP
943 	#define SCTP_APP_TAB_BITS	4
944 	#define SCTP_APP_TAB_SIZE	(1 << SCTP_APP_TAB_BITS)
945 	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
946 	/* Hash table for SCTP application incarnations	 */
947 	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
948 #endif
949 	/* ip_vs_conn */
950 	atomic_t		conn_count;      /* connection counter */
951 
952 	/* ip_vs_ctl */
953 	struct ip_vs_stats_rcu	*tot_stats;      /* Statistics & est. */
954 
955 	int			num_services;    /* no of virtual services */
956 	int			num_services6;   /* IPv6 virtual services */
957 
958 	/* Trash for destinations */
959 	struct list_head	dest_trash;
960 	spinlock_t		dest_trash_lock;
961 	struct timer_list	dest_trash_timer; /* expiration timer */
962 	/* Service counters */
963 	atomic_t		ftpsvc_counter;
964 	atomic_t		nullsvc_counter;
965 	atomic_t		conn_out_counter;
966 
967 #ifdef CONFIG_SYSCTL
968 	/* delayed work for expiring no dest connections */
969 	struct delayed_work	expire_nodest_conn_work;
970 	/* 1/rate drop and drop-entry variables */
971 	struct delayed_work	defense_work;   /* Work handler */
972 	int			drop_rate;
973 	int			drop_counter;
974 	int			old_secure_tcp;
975 	atomic_t		dropentry;
976 	/* locks in ctl.c */
977 	spinlock_t		dropentry_lock;  /* drop entry handling */
978 	spinlock_t		droppacket_lock; /* drop packet handling */
979 	spinlock_t		securetcp_lock;  /* state and timeout tables */
980 
981 	/* sys-ctl struct */
982 	struct ctl_table_header	*sysctl_hdr;
983 	struct ctl_table	*sysctl_tbl;
984 #endif
985 
986 	/* sysctl variables */
987 	int			sysctl_amemthresh;
988 	int			sysctl_am_droprate;
989 	int			sysctl_drop_entry;
990 	int			sysctl_drop_packet;
991 	int			sysctl_secure_tcp;
992 #ifdef CONFIG_IP_VS_NFCT
993 	int			sysctl_conntrack;
994 #endif
995 	int			sysctl_snat_reroute;
996 	int			sysctl_sync_ver;
997 	int			sysctl_sync_ports;
998 	int			sysctl_sync_persist_mode;
999 	unsigned long		sysctl_sync_qlen_max;
1000 	int			sysctl_sync_sock_size;
1001 	int			sysctl_cache_bypass;
1002 	int			sysctl_expire_nodest_conn;
1003 	int			sysctl_sloppy_tcp;
1004 	int			sysctl_sloppy_sctp;
1005 	int			sysctl_expire_quiescent_template;
1006 	int			sysctl_sync_threshold[2];
1007 	unsigned int		sysctl_sync_refresh_period;
1008 	int			sysctl_sync_retries;
1009 	int			sysctl_nat_icmp_send;
1010 	int			sysctl_pmtu_disc;
1011 	int			sysctl_backup_only;
1012 	int			sysctl_conn_reuse_mode;
1013 	int			sysctl_schedule_icmp;
1014 	int			sysctl_ignore_tunneled;
1015 	int			sysctl_run_estimation;
1016 #ifdef CONFIG_SYSCTL
1017 	cpumask_var_t		sysctl_est_cpulist;	/* kthread cpumask */
1018 	int			est_cpulist_valid;	/* cpulist set */
1019 	int			sysctl_est_nice;	/* kthread nice */
1020 	int			est_stopped;		/* stop tasks */
1021 #endif
1022 
1023 	/* ip_vs_lblc */
1024 	int			sysctl_lblc_expiration;
1025 	struct ctl_table_header	*lblc_ctl_header;
1026 	struct ctl_table	*lblc_ctl_table;
1027 	/* ip_vs_lblcr */
1028 	int			sysctl_lblcr_expiration;
1029 	struct ctl_table_header	*lblcr_ctl_header;
1030 	struct ctl_table	*lblcr_ctl_table;
1031 	/* ip_vs_est */
1032 	struct delayed_work	est_reload_work;/* Reload kthread tasks */
1033 	struct mutex		est_mutex;	/* protect kthread tasks */
1034 	struct hlist_head	est_temp_list;	/* Ests during calc phase */
1035 	struct ip_vs_est_kt_data **est_kt_arr;	/* Array of kthread data ptrs */
1036 	unsigned long		est_max_threads;/* Hard limit of kthreads */
1037 	int			est_calc_phase;	/* Calculation phase */
1038 	int			est_chain_max;	/* Calculated chain_max */
1039 	int			est_kt_count;	/* Allocated ptrs */
1040 	int			est_add_ktid;	/* ktid where to add ests */
1041 	atomic_t		est_genid;	/* kthreads reload genid */
1042 	atomic_t		est_genid_done;	/* applied genid */
1043 	/* ip_vs_sync */
1044 	spinlock_t		sync_lock;
1045 	struct ipvs_master_sync_state *ms;
1046 	spinlock_t		sync_buff_lock;
1047 	struct ip_vs_sync_thread_data *master_tinfo;
1048 	struct ip_vs_sync_thread_data *backup_tinfo;
1049 	int			threads_mask;
1050 	volatile int		sync_state;
1051 	struct mutex		sync_mutex;
1052 	struct ipvs_sync_daemon_cfg	mcfg;	/* Master Configuration */
1053 	struct ipvs_sync_daemon_cfg	bcfg;	/* Backup Configuration */
1054 	/* net name space ptr */
1055 	struct net		*net;            /* Needed by timer routines */
1056 	/* Number of heterogeneous destinations, needed becaus heterogeneous
1057 	 * are not supported when synchronization is enabled.
1058 	 */
1059 	unsigned int		mixed_address_family_dests;
1060 	unsigned int		hooks_afmask;	/* &1=AF_INET, &2=AF_INET6 */
1061 };
1062 
1063 #define DEFAULT_SYNC_THRESHOLD	3
1064 #define DEFAULT_SYNC_PERIOD	50
1065 #define DEFAULT_SYNC_VER	1
1066 #define DEFAULT_SLOPPY_TCP	0
1067 #define DEFAULT_SLOPPY_SCTP	0
1068 #define DEFAULT_SYNC_REFRESH_PERIOD	(0U * HZ)
1069 #define DEFAULT_SYNC_RETRIES		0
1070 #define IPVS_SYNC_WAKEUP_RATE	8
1071 #define IPVS_SYNC_QLEN_MAX	(IPVS_SYNC_WAKEUP_RATE * 4)
1072 #define IPVS_SYNC_SEND_DELAY	(HZ / 50)
1073 #define IPVS_SYNC_CHECK_PERIOD	HZ
1074 #define IPVS_SYNC_FLUSH_TIME	(HZ * 2)
1075 #define IPVS_SYNC_PORTS_MAX	(1 << 6)
1076 
1077 #ifdef CONFIG_SYSCTL
1078 
1079 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
1080 {
1081 	return ipvs->sysctl_sync_threshold[0];
1082 }
1083 
1084 static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1085 {
1086 	return READ_ONCE(ipvs->sysctl_sync_threshold[1]);
1087 }
1088 
1089 static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1090 {
1091 	return READ_ONCE(ipvs->sysctl_sync_refresh_period);
1092 }
1093 
1094 static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1095 {
1096 	return ipvs->sysctl_sync_retries;
1097 }
1098 
1099 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1100 {
1101 	return ipvs->sysctl_sync_ver;
1102 }
1103 
1104 static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1105 {
1106 	return ipvs->sysctl_sloppy_tcp;
1107 }
1108 
1109 static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1110 {
1111 	return ipvs->sysctl_sloppy_sctp;
1112 }
1113 
1114 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1115 {
1116 	return READ_ONCE(ipvs->sysctl_sync_ports);
1117 }
1118 
1119 static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1120 {
1121 	return ipvs->sysctl_sync_persist_mode;
1122 }
1123 
1124 static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1125 {
1126 	return ipvs->sysctl_sync_qlen_max;
1127 }
1128 
1129 static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1130 {
1131 	return ipvs->sysctl_sync_sock_size;
1132 }
1133 
1134 static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1135 {
1136 	return ipvs->sysctl_pmtu_disc;
1137 }
1138 
1139 static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1140 {
1141 	return ipvs->sync_state & IP_VS_STATE_BACKUP &&
1142 	       ipvs->sysctl_backup_only;
1143 }
1144 
1145 static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
1146 {
1147 	return ipvs->sysctl_conn_reuse_mode;
1148 }
1149 
1150 static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
1151 {
1152 	return ipvs->sysctl_expire_nodest_conn;
1153 }
1154 
1155 static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
1156 {
1157 	return ipvs->sysctl_schedule_icmp;
1158 }
1159 
1160 static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs)
1161 {
1162 	return ipvs->sysctl_ignore_tunneled;
1163 }
1164 
1165 static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
1166 {
1167 	return ipvs->sysctl_cache_bypass;
1168 }
1169 
1170 static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
1171 {
1172 	return ipvs->sysctl_run_estimation;
1173 }
1174 
1175 static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
1176 {
1177 	if (ipvs->est_cpulist_valid)
1178 		return ipvs->sysctl_est_cpulist;
1179 	else
1180 		return housekeeping_cpumask(HK_TYPE_KTHREAD);
1181 }
1182 
1183 static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
1184 {
1185 	return ipvs->sysctl_est_nice;
1186 }
1187 
1188 #else
1189 
1190 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
1191 {
1192 	return DEFAULT_SYNC_THRESHOLD;
1193 }
1194 
1195 static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1196 {
1197 	return DEFAULT_SYNC_PERIOD;
1198 }
1199 
1200 static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1201 {
1202 	return DEFAULT_SYNC_REFRESH_PERIOD;
1203 }
1204 
1205 static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1206 {
1207 	return DEFAULT_SYNC_RETRIES & 3;
1208 }
1209 
1210 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1211 {
1212 	return DEFAULT_SYNC_VER;
1213 }
1214 
1215 static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1216 {
1217 	return DEFAULT_SLOPPY_TCP;
1218 }
1219 
1220 static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1221 {
1222 	return DEFAULT_SLOPPY_SCTP;
1223 }
1224 
1225 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1226 {
1227 	return 1;
1228 }
1229 
1230 static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1231 {
1232 	return 0;
1233 }
1234 
1235 static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1236 {
1237 	return IPVS_SYNC_QLEN_MAX;
1238 }
1239 
1240 static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1241 {
1242 	return 0;
1243 }
1244 
1245 static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1246 {
1247 	return 1;
1248 }
1249 
1250 static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1251 {
1252 	return 0;
1253 }
1254 
1255 static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
1256 {
1257 	return 1;
1258 }
1259 
1260 static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
1261 {
1262 	return 0;
1263 }
1264 
1265 static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
1266 {
1267 	return 0;
1268 }
1269 
1270 static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs)
1271 {
1272 	return 0;
1273 }
1274 
1275 static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
1276 {
1277 	return 0;
1278 }
1279 
1280 static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
1281 {
1282 	return 1;
1283 }
1284 
1285 static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
1286 {
1287 	return housekeeping_cpumask(HK_TYPE_KTHREAD);
1288 }
1289 
1290 static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
1291 {
1292 	return IPVS_EST_NICE;
1293 }
1294 
1295 #endif
1296 
1297 /* IPVS core functions
1298  * (from ip_vs_core.c)
1299  */
1300 const char *ip_vs_proto_name(unsigned int proto);
1301 void ip_vs_init_hash_table(struct list_head *table, int rows);
1302 struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
1303 				      struct ip_vs_dest *dest,
1304 				      struct sk_buff *skb,
1305 				      const struct ip_vs_iphdr *iph,
1306 				      __be16 dport,
1307 				      __be16 cport);
1308 #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
1309 
1310 #define IP_VS_APP_TYPE_FTP	1
1311 
1312 /* ip_vs_conn handling functions
1313  * (from ip_vs_conn.c)
1314  */
1315 enum {
1316 	IP_VS_DIR_INPUT = 0,
1317 	IP_VS_DIR_OUTPUT,
1318 	IP_VS_DIR_INPUT_ONLY,
1319 	IP_VS_DIR_LAST,
1320 };
1321 
1322 static inline void ip_vs_conn_fill_param(struct netns_ipvs *ipvs, int af, int protocol,
1323 					 const union nf_inet_addr *caddr,
1324 					 __be16 cport,
1325 					 const union nf_inet_addr *vaddr,
1326 					 __be16 vport,
1327 					 struct ip_vs_conn_param *p)
1328 {
1329 	p->ipvs = ipvs;
1330 	p->af = af;
1331 	p->protocol = protocol;
1332 	p->caddr = caddr;
1333 	p->cport = cport;
1334 	p->vaddr = vaddr;
1335 	p->vport = vport;
1336 	p->pe = NULL;
1337 	p->pe_data = NULL;
1338 }
1339 
1340 struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
1341 struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
1342 
1343 struct ip_vs_conn * ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
1344 					    const struct sk_buff *skb,
1345 					    const struct ip_vs_iphdr *iph);
1346 
1347 struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
1348 
1349 struct ip_vs_conn * ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
1350 					     const struct sk_buff *skb,
1351 					     const struct ip_vs_iphdr *iph);
1352 
1353 /* Get reference to gain full access to conn.
1354  * By default, RCU read-side critical sections have access only to
1355  * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
1356  */
1357 static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
1358 {
1359 	return refcount_inc_not_zero(&cp->refcnt);
1360 }
1361 
1362 /* put back the conn without restarting its timer */
1363 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
1364 {
1365 	smp_mb__before_atomic();
1366 	refcount_dec(&cp->refcnt);
1367 }
1368 void ip_vs_conn_put(struct ip_vs_conn *cp);
1369 void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
1370 
1371 struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
1372 				  const union nf_inet_addr *daddr,
1373 				  __be16 dport, unsigned int flags,
1374 				  struct ip_vs_dest *dest, __u32 fwmark);
1375 void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
1376 
1377 const char *ip_vs_state_name(const struct ip_vs_conn *cp);
1378 
1379 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
1380 int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
1381 void ip_vs_random_dropentry(struct netns_ipvs *ipvs);
1382 int ip_vs_conn_init(void);
1383 void ip_vs_conn_cleanup(void);
1384 
1385 static inline void ip_vs_control_del(struct ip_vs_conn *cp)
1386 {
1387 	struct ip_vs_conn *ctl_cp = cp->control;
1388 	if (!ctl_cp) {
1389 		IP_VS_ERR_BUF("request control DEL for uncontrolled: "
1390 			      "%s:%d to %s:%d\n",
1391 			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1392 			      ntohs(cp->cport),
1393 			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1394 			      ntohs(cp->vport));
1395 
1396 		return;
1397 	}
1398 
1399 	IP_VS_DBG_BUF(7, "DELeting control for: "
1400 		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1401 		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1402 		      ntohs(cp->cport),
1403 		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1404 		      ntohs(ctl_cp->cport));
1405 
1406 	cp->control = NULL;
1407 	if (atomic_read(&ctl_cp->n_control) == 0) {
1408 		IP_VS_ERR_BUF("BUG control DEL with n=0 : "
1409 			      "%s:%d to %s:%d\n",
1410 			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1411 			      ntohs(cp->cport),
1412 			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1413 			      ntohs(cp->vport));
1414 
1415 		return;
1416 	}
1417 	atomic_dec(&ctl_cp->n_control);
1418 }
1419 
1420 static inline void
1421 ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
1422 {
1423 	if (cp->control) {
1424 		IP_VS_ERR_BUF("request control ADD for already controlled: "
1425 			      "%s:%d to %s:%d\n",
1426 			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1427 			      ntohs(cp->cport),
1428 			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1429 			      ntohs(cp->vport));
1430 
1431 		ip_vs_control_del(cp);
1432 	}
1433 
1434 	IP_VS_DBG_BUF(7, "ADDing control for: "
1435 		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1436 		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1437 		      ntohs(cp->cport),
1438 		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1439 		      ntohs(ctl_cp->cport));
1440 
1441 	cp->control = ctl_cp;
1442 	atomic_inc(&ctl_cp->n_control);
1443 }
1444 
1445 /* Mark our template as assured */
1446 static inline void
1447 ip_vs_control_assure_ct(struct ip_vs_conn *cp)
1448 {
1449 	struct ip_vs_conn *ct = cp->control;
1450 
1451 	if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
1452 	    (ct->flags & IP_VS_CONN_F_TEMPLATE))
1453 		ct->state |= IP_VS_CTPL_S_ASSURED;
1454 }
1455 
1456 /* IPVS netns init & cleanup functions */
1457 int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
1458 int ip_vs_control_net_init(struct netns_ipvs *ipvs);
1459 int ip_vs_protocol_net_init(struct netns_ipvs *ipvs);
1460 int ip_vs_app_net_init(struct netns_ipvs *ipvs);
1461 int ip_vs_conn_net_init(struct netns_ipvs *ipvs);
1462 int ip_vs_sync_net_init(struct netns_ipvs *ipvs);
1463 void ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs);
1464 void ip_vs_app_net_cleanup(struct netns_ipvs *ipvs);
1465 void ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs);
1466 void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
1467 void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
1468 void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
1469 void ip_vs_service_nets_cleanup(struct list_head *net_list);
1470 
1471 /* IPVS application functions
1472  * (from ip_vs_app.c)
1473  */
1474 #define IP_VS_APP_MAX_PORTS  8
1475 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app);
1476 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app);
1477 int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1478 void ip_vs_unbind_app(struct ip_vs_conn *cp);
1479 int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
1480 			   __u16 port);
1481 int ip_vs_app_inc_get(struct ip_vs_app *inc);
1482 void ip_vs_app_inc_put(struct ip_vs_app *inc);
1483 
1484 int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb,
1485 		      struct ip_vs_iphdr *ipvsh);
1486 int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb,
1487 		     struct ip_vs_iphdr *ipvsh);
1488 
1489 int register_ip_vs_pe(struct ip_vs_pe *pe);
1490 int unregister_ip_vs_pe(struct ip_vs_pe *pe);
1491 struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
1492 struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
1493 
1494 /* Use a #define to avoid all of module.h just for these trivial ops */
1495 #define ip_vs_pe_get(pe)			\
1496 	if (pe && pe->module)			\
1497 		__module_get(pe->module);
1498 
1499 #define ip_vs_pe_put(pe)			\
1500 	if (pe && pe->module)			\
1501 		module_put(pe->module);
1502 
1503 /* IPVS protocol functions (from ip_vs_proto.c) */
1504 int ip_vs_protocol_init(void);
1505 void ip_vs_protocol_cleanup(void);
1506 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
1507 int *ip_vs_create_timeout_table(int *table, int size);
1508 void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
1509 			       const struct sk_buff *skb, int offset,
1510 			       const char *msg);
1511 
1512 extern struct ip_vs_protocol ip_vs_protocol_tcp;
1513 extern struct ip_vs_protocol ip_vs_protocol_udp;
1514 extern struct ip_vs_protocol ip_vs_protocol_icmp;
1515 extern struct ip_vs_protocol ip_vs_protocol_esp;
1516 extern struct ip_vs_protocol ip_vs_protocol_ah;
1517 extern struct ip_vs_protocol ip_vs_protocol_sctp;
1518 
1519 /* Registering/unregistering scheduler functions
1520  * (from ip_vs_sched.c)
1521  */
1522 int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1523 int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1524 int ip_vs_bind_scheduler(struct ip_vs_service *svc,
1525 			 struct ip_vs_scheduler *scheduler);
1526 void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
1527 			    struct ip_vs_scheduler *sched);
1528 struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
1529 void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
1530 struct ip_vs_conn *
1531 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
1532 	       struct ip_vs_proto_data *pd, int *ignored,
1533 	       struct ip_vs_iphdr *iph);
1534 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
1535 		struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph);
1536 
1537 void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
1538 
1539 /* IPVS control data and functions (from ip_vs_ctl.c) */
1540 extern struct ip_vs_stats ip_vs_stats;
1541 extern int sysctl_ip_vs_sync_ver;
1542 
1543 struct ip_vs_service *
1544 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
1545 		  const union nf_inet_addr *vaddr, __be16 vport);
1546 
1547 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
1548 			    const union nf_inet_addr *daddr, __be16 dport);
1549 
1550 struct ip_vs_dest *
1551 ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
1552 			const union nf_inet_addr *daddr, __be16 dport);
1553 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
1554 				     const union nf_inet_addr *daddr,
1555 				     __be16 tun_port);
1556 
1557 int ip_vs_use_count_inc(void);
1558 void ip_vs_use_count_dec(void);
1559 int ip_vs_register_nl_ioctl(void);
1560 void ip_vs_unregister_nl_ioctl(void);
1561 int ip_vs_control_init(void);
1562 void ip_vs_control_cleanup(void);
1563 struct ip_vs_dest *
1564 ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
1565 		const union nf_inet_addr *daddr, __be16 dport,
1566 		const union nf_inet_addr *vaddr, __be16 vport,
1567 		__u16 protocol, __u32 fwmark, __u32 flags);
1568 void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1569 
1570 static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
1571 {
1572 	refcount_inc(&dest->refcnt);
1573 }
1574 
1575 static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
1576 {
1577 	smp_mb__before_atomic();
1578 	refcount_dec(&dest->refcnt);
1579 }
1580 
1581 static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
1582 {
1583 	if (refcount_dec_and_test(&dest->refcnt))
1584 		kfree(dest);
1585 }
1586 
1587 /* IPVS sync daemon data and function prototypes
1588  * (from ip_vs_sync.c)
1589  */
1590 int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *cfg,
1591 		      int state);
1592 int stop_sync_thread(struct netns_ipvs *ipvs, int state);
1593 void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts);
1594 
1595 /* IPVS rate estimator prototypes (from ip_vs_est.c) */
1596 int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
1597 void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
1598 void ip_vs_zero_estimator(struct ip_vs_stats *stats);
1599 void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
1600 void ip_vs_est_reload_start(struct netns_ipvs *ipvs);
1601 int ip_vs_est_kthread_start(struct netns_ipvs *ipvs,
1602 			    struct ip_vs_est_kt_data *kd);
1603 void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd);
1604 
1605 static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs)
1606 {
1607 #ifdef CONFIG_SYSCTL
1608 	/* Stop tasks while cpulist is empty or if disabled with flag */
1609 	ipvs->est_stopped = !sysctl_run_estimation(ipvs) ||
1610 			    (ipvs->est_cpulist_valid &&
1611 			     cpumask_empty(sysctl_est_cpulist(ipvs)));
1612 #endif
1613 }
1614 
1615 static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs)
1616 {
1617 #ifdef CONFIG_SYSCTL
1618 	return ipvs->est_stopped;
1619 #else
1620 	return false;
1621 #endif
1622 }
1623 
1624 static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs)
1625 {
1626 	unsigned int limit = IPVS_EST_CPU_KTHREADS *
1627 			     cpumask_weight(sysctl_est_cpulist(ipvs));
1628 
1629 	return max(1U, limit);
1630 }
1631 
1632 /* Various IPVS packet transmitters (from ip_vs_xmit.c) */
1633 int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1634 		    struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1635 int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1636 		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1637 int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1638 		   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1639 int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1640 		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1641 int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1642 		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1643 int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1644 		    struct ip_vs_protocol *pp, int offset,
1645 		    unsigned int hooknum, struct ip_vs_iphdr *iph);
1646 void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
1647 
1648 #ifdef CONFIG_IP_VS_IPV6
1649 int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1650 			 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1651 int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1652 		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1653 int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1654 			 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1655 int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1656 		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1657 int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1658 		       struct ip_vs_protocol *pp, int offset,
1659 		       unsigned int hooknum, struct ip_vs_iphdr *iph);
1660 #endif
1661 
1662 #ifdef CONFIG_SYSCTL
1663 /* This is a simple mechanism to ignore packets when
1664  * we are loaded. Just set ip_vs_drop_rate to 'n' and
1665  * we start to drop 1/rate of the packets
1666  */
1667 static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
1668 {
1669 	if (!ipvs->drop_rate)
1670 		return 0;
1671 	if (--ipvs->drop_counter > 0)
1672 		return 0;
1673 	ipvs->drop_counter = ipvs->drop_rate;
1674 	return 1;
1675 }
1676 #else
1677 static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
1678 #endif
1679 
1680 #ifdef CONFIG_SYSCTL
1681 /* Enqueue delayed work for expiring no dest connections
1682  * Only run when sysctl_expire_nodest=1
1683  */
1684 static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs)
1685 {
1686 	if (sysctl_expire_nodest_conn(ipvs))
1687 		queue_delayed_work(system_long_wq,
1688 				   &ipvs->expire_nodest_conn_work, 1);
1689 }
1690 
1691 void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs);
1692 #else
1693 static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) {}
1694 #endif
1695 
1696 #define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \
1697 				 IP_VS_CONN_F_FWD_MASK)
1698 
1699 /* ip_vs_fwd_tag returns the forwarding tag of the connection */
1700 #define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
1701 
1702 static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
1703 {
1704 	char fwd;
1705 
1706 	switch (IP_VS_FWD_METHOD(cp)) {
1707 	case IP_VS_CONN_F_MASQ:
1708 		fwd = 'M'; break;
1709 	case IP_VS_CONN_F_LOCALNODE:
1710 		fwd = 'L'; break;
1711 	case IP_VS_CONN_F_TUNNEL:
1712 		fwd = 'T'; break;
1713 	case IP_VS_CONN_F_DROUTE:
1714 		fwd = 'R'; break;
1715 	case IP_VS_CONN_F_BYPASS:
1716 		fwd = 'B'; break;
1717 	default:
1718 		fwd = '?'; break;
1719 	}
1720 	return fwd;
1721 }
1722 
1723 void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
1724 		    struct ip_vs_conn *cp, int dir);
1725 
1726 #ifdef CONFIG_IP_VS_IPV6
1727 void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
1728 		       struct ip_vs_conn *cp, int dir);
1729 #endif
1730 
1731 __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
1732 
1733 static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
1734 {
1735 	__be32 diff[2] = { ~old, new };
1736 
1737 	return csum_partial(diff, sizeof(diff), oldsum);
1738 }
1739 
1740 #ifdef CONFIG_IP_VS_IPV6
1741 static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
1742 					__wsum oldsum)
1743 {
1744 	__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
1745 			    new[3],  new[2],  new[1],  new[0] };
1746 
1747 	return csum_partial(diff, sizeof(diff), oldsum);
1748 }
1749 #endif
1750 
1751 static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
1752 {
1753 	__be16 diff[2] = { ~old, new };
1754 
1755 	return csum_partial(diff, sizeof(diff), oldsum);
1756 }
1757 
1758 /* Forget current conntrack (unconfirmed) and attach notrack entry */
1759 static inline void ip_vs_notrack(struct sk_buff *skb)
1760 {
1761 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1762 	enum ip_conntrack_info ctinfo;
1763 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1764 
1765 	if (ct) {
1766 		nf_conntrack_put(&ct->ct_general);
1767 		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
1768 	}
1769 #endif
1770 }
1771 
1772 #ifdef CONFIG_IP_VS_NFCT
1773 /* Netfilter connection tracking
1774  * (from ip_vs_nfct.c)
1775  */
1776 static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1777 {
1778 #ifdef CONFIG_SYSCTL
1779 	return ipvs->sysctl_conntrack;
1780 #else
1781 	return 0;
1782 #endif
1783 }
1784 
1785 void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
1786 			    int outin);
1787 int ip_vs_confirm_conntrack(struct sk_buff *skb);
1788 void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
1789 			       struct ip_vs_conn *cp, u_int8_t proto,
1790 			       const __be16 port, int from_rs);
1791 void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
1792 
1793 #else
1794 
1795 static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1796 {
1797 	return 0;
1798 }
1799 
1800 static inline void ip_vs_update_conntrack(struct sk_buff *skb,
1801 					  struct ip_vs_conn *cp, int outin)
1802 {
1803 }
1804 
1805 static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
1806 {
1807 	return NF_ACCEPT;
1808 }
1809 
1810 static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
1811 {
1812 }
1813 #endif /* CONFIG_IP_VS_NFCT */
1814 
1815 /* Using old conntrack that can not be redirected to another real server? */
1816 static inline bool ip_vs_conn_uses_old_conntrack(struct ip_vs_conn *cp,
1817 						 struct sk_buff *skb)
1818 {
1819 #ifdef CONFIG_IP_VS_NFCT
1820 	enum ip_conntrack_info ctinfo;
1821 	struct nf_conn *ct;
1822 
1823 	ct = nf_ct_get(skb, &ctinfo);
1824 	if (ct && nf_ct_is_confirmed(ct))
1825 		return true;
1826 #endif
1827 	return false;
1828 }
1829 
1830 static inline int ip_vs_register_conntrack(struct ip_vs_service *svc)
1831 {
1832 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1833 	int afmask = (svc->af == AF_INET6) ? 2 : 1;
1834 	int ret = 0;
1835 
1836 	if (!(svc->conntrack_afmask & afmask)) {
1837 		ret = nf_ct_netns_get(svc->ipvs->net, svc->af);
1838 		if (ret >= 0)
1839 			svc->conntrack_afmask |= afmask;
1840 	}
1841 	return ret;
1842 #else
1843 	return 0;
1844 #endif
1845 }
1846 
1847 static inline void ip_vs_unregister_conntrack(struct ip_vs_service *svc)
1848 {
1849 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1850 	int afmask = (svc->af == AF_INET6) ? 2 : 1;
1851 
1852 	if (svc->conntrack_afmask & afmask) {
1853 		nf_ct_netns_put(svc->ipvs->net, svc->af);
1854 		svc->conntrack_afmask &= ~afmask;
1855 	}
1856 #endif
1857 }
1858 
1859 int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af);
1860 void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af);
1861 
1862 static inline int
1863 ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
1864 {
1865 	/* We think the overhead of processing active connections is 256
1866 	 * times higher than that of inactive connections in average. (This
1867 	 * 256 times might not be accurate, we will change it later) We
1868 	 * use the following formula to estimate the overhead now:
1869 	 *		  dest->activeconns*256 + dest->inactconns
1870 	 */
1871 	return (atomic_read(&dest->activeconns) << 8) +
1872 		atomic_read(&dest->inactconns);
1873 }
1874 
1875 #ifdef CONFIG_IP_VS_PROTO_TCP
1876 INDIRECT_CALLABLE_DECLARE(int
1877 	tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
1878 			 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
1879 #endif
1880 
1881 #ifdef CONFIG_IP_VS_PROTO_UDP
1882 INDIRECT_CALLABLE_DECLARE(int
1883 	udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
1884 			 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
1885 #endif
1886 #endif	/* _NET_IP_VS_H */
1887