xref: /openbmc/linux/net/netfilter/x_tables.c (revision 8dda2eac)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * x_tables core - Backend for {ip,ip6,arp}_tables
4  *
5  * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
6  * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
7  *
8  * Based on existing ip_tables code which is
9  *   Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
10  *   Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
11  */
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/socket.h>
16 #include <linux/net.h>
17 #include <linux/proc_fs.h>
18 #include <linux/seq_file.h>
19 #include <linux/string.h>
20 #include <linux/vmalloc.h>
21 #include <linux/mutex.h>
22 #include <linux/mm.h>
23 #include <linux/slab.h>
24 #include <linux/audit.h>
25 #include <linux/user_namespace.h>
26 #include <net/net_namespace.h>
27 #include <net/netns/generic.h>
28 
29 #include <linux/netfilter/x_tables.h>
30 #include <linux/netfilter_arp.h>
31 #include <linux/netfilter_ipv4/ip_tables.h>
32 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #include <linux/netfilter_arp/arp_tables.h>
34 
35 MODULE_LICENSE("GPL");
36 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
37 MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
38 
39 #define XT_PCPU_BLOCK_SIZE 4096
40 #define XT_MAX_TABLE_SIZE	(512 * 1024 * 1024)
41 
42 struct xt_pernet {
43 	struct list_head tables[NFPROTO_NUMPROTO];
44 };
45 
46 struct compat_delta {
47 	unsigned int offset; /* offset in kernel */
48 	int delta; /* delta in 32bit user land */
49 };
50 
51 struct xt_af {
52 	struct mutex mutex;
53 	struct list_head match;
54 	struct list_head target;
55 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
56 	struct mutex compat_mutex;
57 	struct compat_delta *compat_tab;
58 	unsigned int number; /* number of slots in compat_tab[] */
59 	unsigned int cur; /* number of used slots in compat_tab[] */
60 #endif
61 };
62 
63 static unsigned int xt_pernet_id __read_mostly;
64 static struct xt_af *xt __read_mostly;
65 
66 static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
67 	[NFPROTO_UNSPEC] = "x",
68 	[NFPROTO_IPV4]   = "ip",
69 	[NFPROTO_ARP]    = "arp",
70 	[NFPROTO_BRIDGE] = "eb",
71 	[NFPROTO_IPV6]   = "ip6",
72 };
73 
74 /* Registration hooks for targets. */
75 int xt_register_target(struct xt_target *target)
76 {
77 	u_int8_t af = target->family;
78 
79 	mutex_lock(&xt[af].mutex);
80 	list_add(&target->list, &xt[af].target);
81 	mutex_unlock(&xt[af].mutex);
82 	return 0;
83 }
84 EXPORT_SYMBOL(xt_register_target);
85 
86 void
87 xt_unregister_target(struct xt_target *target)
88 {
89 	u_int8_t af = target->family;
90 
91 	mutex_lock(&xt[af].mutex);
92 	list_del(&target->list);
93 	mutex_unlock(&xt[af].mutex);
94 }
95 EXPORT_SYMBOL(xt_unregister_target);
96 
97 int
98 xt_register_targets(struct xt_target *target, unsigned int n)
99 {
100 	unsigned int i;
101 	int err = 0;
102 
103 	for (i = 0; i < n; i++) {
104 		err = xt_register_target(&target[i]);
105 		if (err)
106 			goto err;
107 	}
108 	return err;
109 
110 err:
111 	if (i > 0)
112 		xt_unregister_targets(target, i);
113 	return err;
114 }
115 EXPORT_SYMBOL(xt_register_targets);
116 
117 void
118 xt_unregister_targets(struct xt_target *target, unsigned int n)
119 {
120 	while (n-- > 0)
121 		xt_unregister_target(&target[n]);
122 }
123 EXPORT_SYMBOL(xt_unregister_targets);
124 
125 int xt_register_match(struct xt_match *match)
126 {
127 	u_int8_t af = match->family;
128 
129 	mutex_lock(&xt[af].mutex);
130 	list_add(&match->list, &xt[af].match);
131 	mutex_unlock(&xt[af].mutex);
132 	return 0;
133 }
134 EXPORT_SYMBOL(xt_register_match);
135 
136 void
137 xt_unregister_match(struct xt_match *match)
138 {
139 	u_int8_t af = match->family;
140 
141 	mutex_lock(&xt[af].mutex);
142 	list_del(&match->list);
143 	mutex_unlock(&xt[af].mutex);
144 }
145 EXPORT_SYMBOL(xt_unregister_match);
146 
147 int
148 xt_register_matches(struct xt_match *match, unsigned int n)
149 {
150 	unsigned int i;
151 	int err = 0;
152 
153 	for (i = 0; i < n; i++) {
154 		err = xt_register_match(&match[i]);
155 		if (err)
156 			goto err;
157 	}
158 	return err;
159 
160 err:
161 	if (i > 0)
162 		xt_unregister_matches(match, i);
163 	return err;
164 }
165 EXPORT_SYMBOL(xt_register_matches);
166 
167 void
168 xt_unregister_matches(struct xt_match *match, unsigned int n)
169 {
170 	while (n-- > 0)
171 		xt_unregister_match(&match[n]);
172 }
173 EXPORT_SYMBOL(xt_unregister_matches);
174 
175 
176 /*
177  * These are weird, but module loading must not be done with mutex
178  * held (since they will register), and we have to have a single
179  * function to use.
180  */
181 
182 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
183 struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
184 {
185 	struct xt_match *m;
186 	int err = -ENOENT;
187 
188 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
189 		return ERR_PTR(-EINVAL);
190 
191 	mutex_lock(&xt[af].mutex);
192 	list_for_each_entry(m, &xt[af].match, list) {
193 		if (strcmp(m->name, name) == 0) {
194 			if (m->revision == revision) {
195 				if (try_module_get(m->me)) {
196 					mutex_unlock(&xt[af].mutex);
197 					return m;
198 				}
199 			} else
200 				err = -EPROTOTYPE; /* Found something. */
201 		}
202 	}
203 	mutex_unlock(&xt[af].mutex);
204 
205 	if (af != NFPROTO_UNSPEC)
206 		/* Try searching again in the family-independent list */
207 		return xt_find_match(NFPROTO_UNSPEC, name, revision);
208 
209 	return ERR_PTR(err);
210 }
211 EXPORT_SYMBOL(xt_find_match);
212 
213 struct xt_match *
214 xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
215 {
216 	struct xt_match *match;
217 
218 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
219 		return ERR_PTR(-EINVAL);
220 
221 	match = xt_find_match(nfproto, name, revision);
222 	if (IS_ERR(match)) {
223 		request_module("%st_%s", xt_prefix[nfproto], name);
224 		match = xt_find_match(nfproto, name, revision);
225 	}
226 
227 	return match;
228 }
229 EXPORT_SYMBOL_GPL(xt_request_find_match);
230 
231 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
232 static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
233 {
234 	struct xt_target *t;
235 	int err = -ENOENT;
236 
237 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
238 		return ERR_PTR(-EINVAL);
239 
240 	mutex_lock(&xt[af].mutex);
241 	list_for_each_entry(t, &xt[af].target, list) {
242 		if (strcmp(t->name, name) == 0) {
243 			if (t->revision == revision) {
244 				if (try_module_get(t->me)) {
245 					mutex_unlock(&xt[af].mutex);
246 					return t;
247 				}
248 			} else
249 				err = -EPROTOTYPE; /* Found something. */
250 		}
251 	}
252 	mutex_unlock(&xt[af].mutex);
253 
254 	if (af != NFPROTO_UNSPEC)
255 		/* Try searching again in the family-independent list */
256 		return xt_find_target(NFPROTO_UNSPEC, name, revision);
257 
258 	return ERR_PTR(err);
259 }
260 
261 struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
262 {
263 	struct xt_target *target;
264 
265 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
266 		return ERR_PTR(-EINVAL);
267 
268 	target = xt_find_target(af, name, revision);
269 	if (IS_ERR(target)) {
270 		request_module("%st_%s", xt_prefix[af], name);
271 		target = xt_find_target(af, name, revision);
272 	}
273 
274 	return target;
275 }
276 EXPORT_SYMBOL_GPL(xt_request_find_target);
277 
278 
279 static int xt_obj_to_user(u16 __user *psize, u16 size,
280 			  void __user *pname, const char *name,
281 			  u8 __user *prev, u8 rev)
282 {
283 	if (put_user(size, psize))
284 		return -EFAULT;
285 	if (copy_to_user(pname, name, strlen(name) + 1))
286 		return -EFAULT;
287 	if (put_user(rev, prev))
288 		return -EFAULT;
289 
290 	return 0;
291 }
292 
293 #define XT_OBJ_TO_USER(U, K, TYPE, C_SIZE)				\
294 	xt_obj_to_user(&U->u.TYPE##_size, C_SIZE ? : K->u.TYPE##_size,	\
295 		       U->u.user.name, K->u.kernel.TYPE->name,		\
296 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
297 
298 int xt_data_to_user(void __user *dst, const void *src,
299 		    int usersize, int size, int aligned_size)
300 {
301 	usersize = usersize ? : size;
302 	if (copy_to_user(dst, src, usersize))
303 		return -EFAULT;
304 	if (usersize != aligned_size &&
305 	    clear_user(dst + usersize, aligned_size - usersize))
306 		return -EFAULT;
307 
308 	return 0;
309 }
310 EXPORT_SYMBOL_GPL(xt_data_to_user);
311 
312 #define XT_DATA_TO_USER(U, K, TYPE)					\
313 	xt_data_to_user(U->data, K->data,				\
314 			K->u.kernel.TYPE->usersize,			\
315 			K->u.kernel.TYPE->TYPE##size,			\
316 			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
317 
318 int xt_match_to_user(const struct xt_entry_match *m,
319 		     struct xt_entry_match __user *u)
320 {
321 	return XT_OBJ_TO_USER(u, m, match, 0) ||
322 	       XT_DATA_TO_USER(u, m, match);
323 }
324 EXPORT_SYMBOL_GPL(xt_match_to_user);
325 
326 int xt_target_to_user(const struct xt_entry_target *t,
327 		      struct xt_entry_target __user *u)
328 {
329 	return XT_OBJ_TO_USER(u, t, target, 0) ||
330 	       XT_DATA_TO_USER(u, t, target);
331 }
332 EXPORT_SYMBOL_GPL(xt_target_to_user);
333 
334 static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
335 {
336 	const struct xt_match *m;
337 	int have_rev = 0;
338 
339 	mutex_lock(&xt[af].mutex);
340 	list_for_each_entry(m, &xt[af].match, list) {
341 		if (strcmp(m->name, name) == 0) {
342 			if (m->revision > *bestp)
343 				*bestp = m->revision;
344 			if (m->revision == revision)
345 				have_rev = 1;
346 		}
347 	}
348 	mutex_unlock(&xt[af].mutex);
349 
350 	if (af != NFPROTO_UNSPEC && !have_rev)
351 		return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
352 
353 	return have_rev;
354 }
355 
356 static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
357 {
358 	const struct xt_target *t;
359 	int have_rev = 0;
360 
361 	mutex_lock(&xt[af].mutex);
362 	list_for_each_entry(t, &xt[af].target, list) {
363 		if (strcmp(t->name, name) == 0) {
364 			if (t->revision > *bestp)
365 				*bestp = t->revision;
366 			if (t->revision == revision)
367 				have_rev = 1;
368 		}
369 	}
370 	mutex_unlock(&xt[af].mutex);
371 
372 	if (af != NFPROTO_UNSPEC && !have_rev)
373 		return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
374 
375 	return have_rev;
376 }
377 
378 /* Returns true or false (if no such extension at all) */
379 int xt_find_revision(u8 af, const char *name, u8 revision, int target,
380 		     int *err)
381 {
382 	int have_rev, best = -1;
383 
384 	if (target == 1)
385 		have_rev = target_revfn(af, name, revision, &best);
386 	else
387 		have_rev = match_revfn(af, name, revision, &best);
388 
389 	/* Nothing at all?  Return 0 to try loading module. */
390 	if (best == -1) {
391 		*err = -ENOENT;
392 		return 0;
393 	}
394 
395 	*err = best;
396 	if (!have_rev)
397 		*err = -EPROTONOSUPPORT;
398 	return 1;
399 }
400 EXPORT_SYMBOL_GPL(xt_find_revision);
401 
402 static char *
403 textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
404 {
405 	static const char *const inetbr_names[] = {
406 		"PREROUTING", "INPUT", "FORWARD",
407 		"OUTPUT", "POSTROUTING", "BROUTING",
408 	};
409 	static const char *const arp_names[] = {
410 		"INPUT", "FORWARD", "OUTPUT",
411 	};
412 	const char *const *names;
413 	unsigned int i, max;
414 	char *p = buf;
415 	bool np = false;
416 	int res;
417 
418 	names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names;
419 	max   = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) :
420 	                                   ARRAY_SIZE(inetbr_names);
421 	*p = '\0';
422 	for (i = 0; i < max; ++i) {
423 		if (!(mask & (1 << i)))
424 			continue;
425 		res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
426 		if (res > 0) {
427 			size -= res;
428 			p += res;
429 		}
430 		np = true;
431 	}
432 
433 	return buf;
434 }
435 
436 /**
437  * xt_check_proc_name - check that name is suitable for /proc file creation
438  *
439  * @name: file name candidate
440  * @size: length of buffer
441  *
442  * some x_tables modules wish to create a file in /proc.
443  * This function makes sure that the name is suitable for this
444  * purpose, it checks that name is NUL terminated and isn't a 'special'
445  * name, like "..".
446  *
447  * returns negative number on error or 0 if name is useable.
448  */
449 int xt_check_proc_name(const char *name, unsigned int size)
450 {
451 	if (name[0] == '\0')
452 		return -EINVAL;
453 
454 	if (strnlen(name, size) == size)
455 		return -ENAMETOOLONG;
456 
457 	if (strcmp(name, ".") == 0 ||
458 	    strcmp(name, "..") == 0 ||
459 	    strchr(name, '/'))
460 		return -EINVAL;
461 
462 	return 0;
463 }
464 EXPORT_SYMBOL(xt_check_proc_name);
465 
466 int xt_check_match(struct xt_mtchk_param *par,
467 		   unsigned int size, u16 proto, bool inv_proto)
468 {
469 	int ret;
470 
471 	if (XT_ALIGN(par->match->matchsize) != size &&
472 	    par->match->matchsize != -1) {
473 		/*
474 		 * ebt_among is exempt from centralized matchsize checking
475 		 * because it uses a dynamic-size data set.
476 		 */
477 		pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n",
478 				   xt_prefix[par->family], par->match->name,
479 				   par->match->revision,
480 				   XT_ALIGN(par->match->matchsize), size);
481 		return -EINVAL;
482 	}
483 	if (par->match->table != NULL &&
484 	    strcmp(par->match->table, par->table) != 0) {
485 		pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n",
486 				    xt_prefix[par->family], par->match->name,
487 				    par->match->table, par->table);
488 		return -EINVAL;
489 	}
490 	if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
491 		char used[64], allow[64];
492 
493 		pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n",
494 				    xt_prefix[par->family], par->match->name,
495 				    textify_hooks(used, sizeof(used),
496 						  par->hook_mask, par->family),
497 				    textify_hooks(allow, sizeof(allow),
498 						  par->match->hooks,
499 						  par->family));
500 		return -EINVAL;
501 	}
502 	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
503 		pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n",
504 				    xt_prefix[par->family], par->match->name,
505 				    par->match->proto);
506 		return -EINVAL;
507 	}
508 	if (par->match->checkentry != NULL) {
509 		ret = par->match->checkentry(par);
510 		if (ret < 0)
511 			return ret;
512 		else if (ret > 0)
513 			/* Flag up potential errors. */
514 			return -EIO;
515 	}
516 	return 0;
517 }
518 EXPORT_SYMBOL_GPL(xt_check_match);
519 
520 /** xt_check_entry_match - check that matches end before start of target
521  *
522  * @match: beginning of xt_entry_match
523  * @target: beginning of this rules target (alleged end of matches)
524  * @alignment: alignment requirement of match structures
525  *
526  * Validates that all matches add up to the beginning of the target,
527  * and that each match covers at least the base structure size.
528  *
529  * Return: 0 on success, negative errno on failure.
530  */
531 static int xt_check_entry_match(const char *match, const char *target,
532 				const size_t alignment)
533 {
534 	const struct xt_entry_match *pos;
535 	int length = target - match;
536 
537 	if (length == 0) /* no matches */
538 		return 0;
539 
540 	pos = (struct xt_entry_match *)match;
541 	do {
542 		if ((unsigned long)pos % alignment)
543 			return -EINVAL;
544 
545 		if (length < (int)sizeof(struct xt_entry_match))
546 			return -EINVAL;
547 
548 		if (pos->u.match_size < sizeof(struct xt_entry_match))
549 			return -EINVAL;
550 
551 		if (pos->u.match_size > length)
552 			return -EINVAL;
553 
554 		length -= pos->u.match_size;
555 		pos = ((void *)((char *)(pos) + (pos)->u.match_size));
556 	} while (length > 0);
557 
558 	return 0;
559 }
560 
561 /** xt_check_table_hooks - check hook entry points are sane
562  *
563  * @info xt_table_info to check
564  * @valid_hooks - hook entry points that we can enter from
565  *
566  * Validates that the hook entry and underflows points are set up.
567  *
568  * Return: 0 on success, negative errno on failure.
569  */
570 int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks)
571 {
572 	const char *err = "unsorted underflow";
573 	unsigned int i, max_uflow, max_entry;
574 	bool check_hooks = false;
575 
576 	BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow));
577 
578 	max_entry = 0;
579 	max_uflow = 0;
580 
581 	for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) {
582 		if (!(valid_hooks & (1 << i)))
583 			continue;
584 
585 		if (info->hook_entry[i] == 0xFFFFFFFF)
586 			return -EINVAL;
587 		if (info->underflow[i] == 0xFFFFFFFF)
588 			return -EINVAL;
589 
590 		if (check_hooks) {
591 			if (max_uflow > info->underflow[i])
592 				goto error;
593 
594 			if (max_uflow == info->underflow[i]) {
595 				err = "duplicate underflow";
596 				goto error;
597 			}
598 			if (max_entry > info->hook_entry[i]) {
599 				err = "unsorted entry";
600 				goto error;
601 			}
602 			if (max_entry == info->hook_entry[i]) {
603 				err = "duplicate entry";
604 				goto error;
605 			}
606 		}
607 		max_entry = info->hook_entry[i];
608 		max_uflow = info->underflow[i];
609 		check_hooks = true;
610 	}
611 
612 	return 0;
613 error:
614 	pr_err_ratelimited("%s at hook %d\n", err, i);
615 	return -EINVAL;
616 }
617 EXPORT_SYMBOL(xt_check_table_hooks);
618 
619 static bool verdict_ok(int verdict)
620 {
621 	if (verdict > 0)
622 		return true;
623 
624 	if (verdict < 0) {
625 		int v = -verdict - 1;
626 
627 		if (verdict == XT_RETURN)
628 			return true;
629 
630 		switch (v) {
631 		case NF_ACCEPT: return true;
632 		case NF_DROP: return true;
633 		case NF_QUEUE: return true;
634 		default:
635 			break;
636 		}
637 
638 		return false;
639 	}
640 
641 	return false;
642 }
643 
644 static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
645 			const char *msg, unsigned int msglen)
646 {
647 	return usersize == kernsize && strnlen(msg, msglen) < msglen;
648 }
649 
650 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
651 int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
652 {
653 	struct xt_af *xp = &xt[af];
654 
655 	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
656 
657 	if (WARN_ON(!xp->compat_tab))
658 		return -ENOMEM;
659 
660 	if (xp->cur >= xp->number)
661 		return -EINVAL;
662 
663 	if (xp->cur)
664 		delta += xp->compat_tab[xp->cur - 1].delta;
665 	xp->compat_tab[xp->cur].offset = offset;
666 	xp->compat_tab[xp->cur].delta = delta;
667 	xp->cur++;
668 	return 0;
669 }
670 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
671 
672 void xt_compat_flush_offsets(u_int8_t af)
673 {
674 	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
675 
676 	if (xt[af].compat_tab) {
677 		vfree(xt[af].compat_tab);
678 		xt[af].compat_tab = NULL;
679 		xt[af].number = 0;
680 		xt[af].cur = 0;
681 	}
682 }
683 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
684 
685 int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
686 {
687 	struct compat_delta *tmp = xt[af].compat_tab;
688 	int mid, left = 0, right = xt[af].cur - 1;
689 
690 	while (left <= right) {
691 		mid = (left + right) >> 1;
692 		if (offset > tmp[mid].offset)
693 			left = mid + 1;
694 		else if (offset < tmp[mid].offset)
695 			right = mid - 1;
696 		else
697 			return mid ? tmp[mid - 1].delta : 0;
698 	}
699 	return left ? tmp[left - 1].delta : 0;
700 }
701 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
702 
703 int xt_compat_init_offsets(u8 af, unsigned int number)
704 {
705 	size_t mem;
706 
707 	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
708 
709 	if (!number || number > (INT_MAX / sizeof(struct compat_delta)))
710 		return -EINVAL;
711 
712 	if (WARN_ON(xt[af].compat_tab))
713 		return -EINVAL;
714 
715 	mem = sizeof(struct compat_delta) * number;
716 	if (mem > XT_MAX_TABLE_SIZE)
717 		return -ENOMEM;
718 
719 	xt[af].compat_tab = vmalloc(mem);
720 	if (!xt[af].compat_tab)
721 		return -ENOMEM;
722 
723 	xt[af].number = number;
724 	xt[af].cur = 0;
725 
726 	return 0;
727 }
728 EXPORT_SYMBOL(xt_compat_init_offsets);
729 
730 int xt_compat_match_offset(const struct xt_match *match)
731 {
732 	u_int16_t csize = match->compatsize ? : match->matchsize;
733 	return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
734 }
735 EXPORT_SYMBOL_GPL(xt_compat_match_offset);
736 
737 void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
738 			       unsigned int *size)
739 {
740 	const struct xt_match *match = m->u.kernel.match;
741 	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
742 	int off = xt_compat_match_offset(match);
743 	u_int16_t msize = cm->u.user.match_size;
744 	char name[sizeof(m->u.user.name)];
745 
746 	m = *dstptr;
747 	memcpy(m, cm, sizeof(*cm));
748 	if (match->compat_from_user)
749 		match->compat_from_user(m->data, cm->data);
750 	else
751 		memcpy(m->data, cm->data, msize - sizeof(*cm));
752 
753 	msize += off;
754 	m->u.user.match_size = msize;
755 	strlcpy(name, match->name, sizeof(name));
756 	module_put(match->me);
757 	strncpy(m->u.user.name, name, sizeof(m->u.user.name));
758 
759 	*size += off;
760 	*dstptr += msize;
761 }
762 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
763 
764 #define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
765 	xt_data_to_user(U->data, K->data,				\
766 			K->u.kernel.TYPE->usersize,			\
767 			C_SIZE,						\
768 			COMPAT_XT_ALIGN(C_SIZE))
769 
770 int xt_compat_match_to_user(const struct xt_entry_match *m,
771 			    void __user **dstptr, unsigned int *size)
772 {
773 	const struct xt_match *match = m->u.kernel.match;
774 	struct compat_xt_entry_match __user *cm = *dstptr;
775 	int off = xt_compat_match_offset(match);
776 	u_int16_t msize = m->u.user.match_size - off;
777 
778 	if (XT_OBJ_TO_USER(cm, m, match, msize))
779 		return -EFAULT;
780 
781 	if (match->compat_to_user) {
782 		if (match->compat_to_user((void __user *)cm->data, m->data))
783 			return -EFAULT;
784 	} else {
785 		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
786 			return -EFAULT;
787 	}
788 
789 	*size -= off;
790 	*dstptr += msize;
791 	return 0;
792 }
793 EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
794 
795 /* non-compat version may have padding after verdict */
796 struct compat_xt_standard_target {
797 	struct compat_xt_entry_target t;
798 	compat_uint_t verdict;
799 };
800 
801 struct compat_xt_error_target {
802 	struct compat_xt_entry_target t;
803 	char errorname[XT_FUNCTION_MAXNAMELEN];
804 };
805 
806 int xt_compat_check_entry_offsets(const void *base, const char *elems,
807 				  unsigned int target_offset,
808 				  unsigned int next_offset)
809 {
810 	long size_of_base_struct = elems - (const char *)base;
811 	const struct compat_xt_entry_target *t;
812 	const char *e = base;
813 
814 	if (target_offset < size_of_base_struct)
815 		return -EINVAL;
816 
817 	if (target_offset + sizeof(*t) > next_offset)
818 		return -EINVAL;
819 
820 	t = (void *)(e + target_offset);
821 	if (t->u.target_size < sizeof(*t))
822 		return -EINVAL;
823 
824 	if (target_offset + t->u.target_size > next_offset)
825 		return -EINVAL;
826 
827 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
828 		const struct compat_xt_standard_target *st = (const void *)t;
829 
830 		if (COMPAT_XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
831 			return -EINVAL;
832 
833 		if (!verdict_ok(st->verdict))
834 			return -EINVAL;
835 	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
836 		const struct compat_xt_error_target *et = (const void *)t;
837 
838 		if (!error_tg_ok(t->u.target_size, sizeof(*et),
839 				 et->errorname, sizeof(et->errorname)))
840 			return -EINVAL;
841 	}
842 
843 	/* compat_xt_entry match has less strict alignment requirements,
844 	 * otherwise they are identical.  In case of padding differences
845 	 * we need to add compat version of xt_check_entry_match.
846 	 */
847 	BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
848 
849 	return xt_check_entry_match(elems, base + target_offset,
850 				    __alignof__(struct compat_xt_entry_match));
851 }
852 EXPORT_SYMBOL(xt_compat_check_entry_offsets);
853 #endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
854 
855 /**
856  * xt_check_entry_offsets - validate arp/ip/ip6t_entry
857  *
858  * @base: pointer to arp/ip/ip6t_entry
859  * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
860  * @target_offset: the arp/ip/ip6_t->target_offset
861  * @next_offset: the arp/ip/ip6_t->next_offset
862  *
863  * validates that target_offset and next_offset are sane and that all
864  * match sizes (if any) align with the target offset.
865  *
866  * This function does not validate the targets or matches themselves, it
867  * only tests that all the offsets and sizes are correct, that all
868  * match structures are aligned, and that the last structure ends where
869  * the target structure begins.
870  *
871  * Also see xt_compat_check_entry_offsets for CONFIG_NETFILTER_XTABLES_COMPAT version.
872  *
873  * The arp/ip/ip6t_entry structure @base must have passed following tests:
874  * - it must point to a valid memory location
875  * - base to base + next_offset must be accessible, i.e. not exceed allocated
876  *   length.
877  *
878  * A well-formed entry looks like this:
879  *
880  * ip(6)t_entry   match [mtdata]  match [mtdata] target [tgdata] ip(6)t_entry
881  * e->elems[]-----'                              |               |
882  *                matchsize                      |               |
883  *                                matchsize      |               |
884  *                                               |               |
885  * target_offset---------------------------------'               |
886  * next_offset---------------------------------------------------'
887  *
888  * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
889  *          This is where matches (if any) and the target reside.
890  * target_offset: beginning of target.
891  * next_offset: start of the next rule; also: size of this rule.
892  * Since targets have a minimum size, target_offset + minlen <= next_offset.
893  *
894  * Every match stores its size, sum of sizes must not exceed target_offset.
895  *
896  * Return: 0 on success, negative errno on failure.
897  */
898 int xt_check_entry_offsets(const void *base,
899 			   const char *elems,
900 			   unsigned int target_offset,
901 			   unsigned int next_offset)
902 {
903 	long size_of_base_struct = elems - (const char *)base;
904 	const struct xt_entry_target *t;
905 	const char *e = base;
906 
907 	/* target start is within the ip/ip6/arpt_entry struct */
908 	if (target_offset < size_of_base_struct)
909 		return -EINVAL;
910 
911 	if (target_offset + sizeof(*t) > next_offset)
912 		return -EINVAL;
913 
914 	t = (void *)(e + target_offset);
915 	if (t->u.target_size < sizeof(*t))
916 		return -EINVAL;
917 
918 	if (target_offset + t->u.target_size > next_offset)
919 		return -EINVAL;
920 
921 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
922 		const struct xt_standard_target *st = (const void *)t;
923 
924 		if (XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
925 			return -EINVAL;
926 
927 		if (!verdict_ok(st->verdict))
928 			return -EINVAL;
929 	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
930 		const struct xt_error_target *et = (const void *)t;
931 
932 		if (!error_tg_ok(t->u.target_size, sizeof(*et),
933 				 et->errorname, sizeof(et->errorname)))
934 			return -EINVAL;
935 	}
936 
937 	return xt_check_entry_match(elems, base + target_offset,
938 				    __alignof__(struct xt_entry_match));
939 }
940 EXPORT_SYMBOL(xt_check_entry_offsets);
941 
942 /**
943  * xt_alloc_entry_offsets - allocate array to store rule head offsets
944  *
945  * @size: number of entries
946  *
947  * Return: NULL or zeroed kmalloc'd or vmalloc'd array
948  */
949 unsigned int *xt_alloc_entry_offsets(unsigned int size)
950 {
951 	if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int))
952 		return NULL;
953 
954 	return kvcalloc(size, sizeof(unsigned int), GFP_KERNEL);
955 
956 }
957 EXPORT_SYMBOL(xt_alloc_entry_offsets);
958 
959 /**
960  * xt_find_jump_offset - check if target is a valid jump offset
961  *
962  * @offsets: array containing all valid rule start offsets of a rule blob
963  * @target: the jump target to search for
964  * @size: entries in @offset
965  */
966 bool xt_find_jump_offset(const unsigned int *offsets,
967 			 unsigned int target, unsigned int size)
968 {
969 	int m, low = 0, hi = size;
970 
971 	while (hi > low) {
972 		m = (low + hi) / 2u;
973 
974 		if (offsets[m] > target)
975 			hi = m;
976 		else if (offsets[m] < target)
977 			low = m + 1;
978 		else
979 			return true;
980 	}
981 
982 	return false;
983 }
984 EXPORT_SYMBOL(xt_find_jump_offset);
985 
986 int xt_check_target(struct xt_tgchk_param *par,
987 		    unsigned int size, u16 proto, bool inv_proto)
988 {
989 	int ret;
990 
991 	if (XT_ALIGN(par->target->targetsize) != size) {
992 		pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n",
993 				   xt_prefix[par->family], par->target->name,
994 				   par->target->revision,
995 				   XT_ALIGN(par->target->targetsize), size);
996 		return -EINVAL;
997 	}
998 	if (par->target->table != NULL &&
999 	    strcmp(par->target->table, par->table) != 0) {
1000 		pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n",
1001 				    xt_prefix[par->family], par->target->name,
1002 				    par->target->table, par->table);
1003 		return -EINVAL;
1004 	}
1005 	if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
1006 		char used[64], allow[64];
1007 
1008 		pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n",
1009 				    xt_prefix[par->family], par->target->name,
1010 				    textify_hooks(used, sizeof(used),
1011 						  par->hook_mask, par->family),
1012 				    textify_hooks(allow, sizeof(allow),
1013 						  par->target->hooks,
1014 						  par->family));
1015 		return -EINVAL;
1016 	}
1017 	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
1018 		pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n",
1019 				    xt_prefix[par->family], par->target->name,
1020 				    par->target->proto);
1021 		return -EINVAL;
1022 	}
1023 	if (par->target->checkentry != NULL) {
1024 		ret = par->target->checkentry(par);
1025 		if (ret < 0)
1026 			return ret;
1027 		else if (ret > 0)
1028 			/* Flag up potential errors. */
1029 			return -EIO;
1030 	}
1031 	return 0;
1032 }
1033 EXPORT_SYMBOL_GPL(xt_check_target);
1034 
1035 /**
1036  * xt_copy_counters - copy counters and metadata from a sockptr_t
1037  *
1038  * @arg: src sockptr
1039  * @len: alleged size of userspace memory
1040  * @info: where to store the xt_counters_info metadata
1041  *
1042  * Copies counter meta data from @user and stores it in @info.
1043  *
1044  * vmallocs memory to hold the counters, then copies the counter data
1045  * from @user to the new memory and returns a pointer to it.
1046  *
1047  * If called from a compat syscall, @info gets converted automatically to the
1048  * 64bit representation.
1049  *
1050  * The metadata associated with the counters is stored in @info.
1051  *
1052  * Return: returns pointer that caller has to test via IS_ERR().
1053  * If IS_ERR is false, caller has to vfree the pointer.
1054  */
1055 void *xt_copy_counters(sockptr_t arg, unsigned int len,
1056 		       struct xt_counters_info *info)
1057 {
1058 	size_t offset;
1059 	void *mem;
1060 	u64 size;
1061 
1062 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1063 	if (in_compat_syscall()) {
1064 		/* structures only differ in size due to alignment */
1065 		struct compat_xt_counters_info compat_tmp;
1066 
1067 		if (len <= sizeof(compat_tmp))
1068 			return ERR_PTR(-EINVAL);
1069 
1070 		len -= sizeof(compat_tmp);
1071 		if (copy_from_sockptr(&compat_tmp, arg, sizeof(compat_tmp)) != 0)
1072 			return ERR_PTR(-EFAULT);
1073 
1074 		memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
1075 		info->num_counters = compat_tmp.num_counters;
1076 		offset = sizeof(compat_tmp);
1077 	} else
1078 #endif
1079 	{
1080 		if (len <= sizeof(*info))
1081 			return ERR_PTR(-EINVAL);
1082 
1083 		len -= sizeof(*info);
1084 		if (copy_from_sockptr(info, arg, sizeof(*info)) != 0)
1085 			return ERR_PTR(-EFAULT);
1086 
1087 		offset = sizeof(*info);
1088 	}
1089 	info->name[sizeof(info->name) - 1] = '\0';
1090 
1091 	size = sizeof(struct xt_counters);
1092 	size *= info->num_counters;
1093 
1094 	if (size != (u64)len)
1095 		return ERR_PTR(-EINVAL);
1096 
1097 	mem = vmalloc(len);
1098 	if (!mem)
1099 		return ERR_PTR(-ENOMEM);
1100 
1101 	if (copy_from_sockptr_offset(mem, arg, offset, len) == 0)
1102 		return mem;
1103 
1104 	vfree(mem);
1105 	return ERR_PTR(-EFAULT);
1106 }
1107 EXPORT_SYMBOL_GPL(xt_copy_counters);
1108 
1109 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1110 int xt_compat_target_offset(const struct xt_target *target)
1111 {
1112 	u_int16_t csize = target->compatsize ? : target->targetsize;
1113 	return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
1114 }
1115 EXPORT_SYMBOL_GPL(xt_compat_target_offset);
1116 
1117 void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
1118 				unsigned int *size)
1119 {
1120 	const struct xt_target *target = t->u.kernel.target;
1121 	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
1122 	int off = xt_compat_target_offset(target);
1123 	u_int16_t tsize = ct->u.user.target_size;
1124 	char name[sizeof(t->u.user.name)];
1125 
1126 	t = *dstptr;
1127 	memcpy(t, ct, sizeof(*ct));
1128 	if (target->compat_from_user)
1129 		target->compat_from_user(t->data, ct->data);
1130 	else
1131 		memcpy(t->data, ct->data, tsize - sizeof(*ct));
1132 
1133 	tsize += off;
1134 	t->u.user.target_size = tsize;
1135 	strlcpy(name, target->name, sizeof(name));
1136 	module_put(target->me);
1137 	strncpy(t->u.user.name, name, sizeof(t->u.user.name));
1138 
1139 	*size += off;
1140 	*dstptr += tsize;
1141 }
1142 EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
1143 
1144 int xt_compat_target_to_user(const struct xt_entry_target *t,
1145 			     void __user **dstptr, unsigned int *size)
1146 {
1147 	const struct xt_target *target = t->u.kernel.target;
1148 	struct compat_xt_entry_target __user *ct = *dstptr;
1149 	int off = xt_compat_target_offset(target);
1150 	u_int16_t tsize = t->u.user.target_size - off;
1151 
1152 	if (XT_OBJ_TO_USER(ct, t, target, tsize))
1153 		return -EFAULT;
1154 
1155 	if (target->compat_to_user) {
1156 		if (target->compat_to_user((void __user *)ct->data, t->data))
1157 			return -EFAULT;
1158 	} else {
1159 		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
1160 			return -EFAULT;
1161 	}
1162 
1163 	*size -= off;
1164 	*dstptr += tsize;
1165 	return 0;
1166 }
1167 EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
1168 #endif
1169 
1170 struct xt_table_info *xt_alloc_table_info(unsigned int size)
1171 {
1172 	struct xt_table_info *info = NULL;
1173 	size_t sz = sizeof(*info) + size;
1174 
1175 	if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE)
1176 		return NULL;
1177 
1178 	info = kvmalloc(sz, GFP_KERNEL_ACCOUNT);
1179 	if (!info)
1180 		return NULL;
1181 
1182 	memset(info, 0, sizeof(*info));
1183 	info->size = size;
1184 	return info;
1185 }
1186 EXPORT_SYMBOL(xt_alloc_table_info);
1187 
1188 void xt_free_table_info(struct xt_table_info *info)
1189 {
1190 	int cpu;
1191 
1192 	if (info->jumpstack != NULL) {
1193 		for_each_possible_cpu(cpu)
1194 			kvfree(info->jumpstack[cpu]);
1195 		kvfree(info->jumpstack);
1196 	}
1197 
1198 	kvfree(info);
1199 }
1200 EXPORT_SYMBOL(xt_free_table_info);
1201 
1202 struct xt_table *xt_find_table(struct net *net, u8 af, const char *name)
1203 {
1204 	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
1205 	struct xt_table *t;
1206 
1207 	mutex_lock(&xt[af].mutex);
1208 	list_for_each_entry(t, &xt_net->tables[af], list) {
1209 		if (strcmp(t->name, name) == 0) {
1210 			mutex_unlock(&xt[af].mutex);
1211 			return t;
1212 		}
1213 	}
1214 	mutex_unlock(&xt[af].mutex);
1215 	return NULL;
1216 }
1217 EXPORT_SYMBOL(xt_find_table);
1218 
1219 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR on error. */
1220 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
1221 				    const char *name)
1222 {
1223 	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
1224 	struct xt_table *t, *found = NULL;
1225 
1226 	mutex_lock(&xt[af].mutex);
1227 	list_for_each_entry(t, &xt_net->tables[af], list)
1228 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
1229 			return t;
1230 
1231 	if (net == &init_net)
1232 		goto out;
1233 
1234 	/* Table doesn't exist in this netns, re-try init */
1235 	xt_net = net_generic(&init_net, xt_pernet_id);
1236 	list_for_each_entry(t, &xt_net->tables[af], list) {
1237 		int err;
1238 
1239 		if (strcmp(t->name, name))
1240 			continue;
1241 		if (!try_module_get(t->me))
1242 			goto out;
1243 		mutex_unlock(&xt[af].mutex);
1244 		err = t->table_init(net);
1245 		if (err < 0) {
1246 			module_put(t->me);
1247 			return ERR_PTR(err);
1248 		}
1249 
1250 		found = t;
1251 
1252 		mutex_lock(&xt[af].mutex);
1253 		break;
1254 	}
1255 
1256 	if (!found)
1257 		goto out;
1258 
1259 	xt_net = net_generic(net, xt_pernet_id);
1260 	/* and once again: */
1261 	list_for_each_entry(t, &xt_net->tables[af], list)
1262 		if (strcmp(t->name, name) == 0)
1263 			return t;
1264 
1265 	module_put(found->me);
1266  out:
1267 	mutex_unlock(&xt[af].mutex);
1268 	return ERR_PTR(-ENOENT);
1269 }
1270 EXPORT_SYMBOL_GPL(xt_find_table_lock);
1271 
1272 struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
1273 					    const char *name)
1274 {
1275 	struct xt_table *t = xt_find_table_lock(net, af, name);
1276 
1277 #ifdef CONFIG_MODULES
1278 	if (IS_ERR(t)) {
1279 		int err = request_module("%stable_%s", xt_prefix[af], name);
1280 		if (err < 0)
1281 			return ERR_PTR(err);
1282 		t = xt_find_table_lock(net, af, name);
1283 	}
1284 #endif
1285 
1286 	return t;
1287 }
1288 EXPORT_SYMBOL_GPL(xt_request_find_table_lock);
1289 
1290 void xt_table_unlock(struct xt_table *table)
1291 {
1292 	mutex_unlock(&xt[table->af].mutex);
1293 }
1294 EXPORT_SYMBOL_GPL(xt_table_unlock);
1295 
1296 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1297 void xt_compat_lock(u_int8_t af)
1298 {
1299 	mutex_lock(&xt[af].compat_mutex);
1300 }
1301 EXPORT_SYMBOL_GPL(xt_compat_lock);
1302 
1303 void xt_compat_unlock(u_int8_t af)
1304 {
1305 	mutex_unlock(&xt[af].compat_mutex);
1306 }
1307 EXPORT_SYMBOL_GPL(xt_compat_unlock);
1308 #endif
1309 
1310 DEFINE_PER_CPU(seqcount_t, xt_recseq);
1311 EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
1312 
1313 struct static_key xt_tee_enabled __read_mostly;
1314 EXPORT_SYMBOL_GPL(xt_tee_enabled);
1315 
1316 static int xt_jumpstack_alloc(struct xt_table_info *i)
1317 {
1318 	unsigned int size;
1319 	int cpu;
1320 
1321 	size = sizeof(void **) * nr_cpu_ids;
1322 	if (size > PAGE_SIZE)
1323 		i->jumpstack = kvzalloc(size, GFP_KERNEL);
1324 	else
1325 		i->jumpstack = kzalloc(size, GFP_KERNEL);
1326 	if (i->jumpstack == NULL)
1327 		return -ENOMEM;
1328 
1329 	/* ruleset without jumps -- no stack needed */
1330 	if (i->stacksize == 0)
1331 		return 0;
1332 
1333 	/* Jumpstack needs to be able to record two full callchains, one
1334 	 * from the first rule set traversal, plus one table reentrancy
1335 	 * via -j TEE without clobbering the callchain that brought us to
1336 	 * TEE target.
1337 	 *
1338 	 * This is done by allocating two jumpstacks per cpu, on reentry
1339 	 * the upper half of the stack is used.
1340 	 *
1341 	 * see the jumpstack setup in ipt_do_table() for more details.
1342 	 */
1343 	size = sizeof(void *) * i->stacksize * 2u;
1344 	for_each_possible_cpu(cpu) {
1345 		i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL,
1346 			cpu_to_node(cpu));
1347 		if (i->jumpstack[cpu] == NULL)
1348 			/*
1349 			 * Freeing will be done later on by the callers. The
1350 			 * chain is: xt_replace_table -> __do_replace ->
1351 			 * do_replace -> xt_free_table_info.
1352 			 */
1353 			return -ENOMEM;
1354 	}
1355 
1356 	return 0;
1357 }
1358 
1359 struct xt_counters *xt_counters_alloc(unsigned int counters)
1360 {
1361 	struct xt_counters *mem;
1362 
1363 	if (counters == 0 || counters > INT_MAX / sizeof(*mem))
1364 		return NULL;
1365 
1366 	counters *= sizeof(*mem);
1367 	if (counters > XT_MAX_TABLE_SIZE)
1368 		return NULL;
1369 
1370 	return vzalloc(counters);
1371 }
1372 EXPORT_SYMBOL(xt_counters_alloc);
1373 
1374 struct xt_table_info *
1375 xt_replace_table(struct xt_table *table,
1376 	      unsigned int num_counters,
1377 	      struct xt_table_info *newinfo,
1378 	      int *error)
1379 {
1380 	struct xt_table_info *private;
1381 	unsigned int cpu;
1382 	int ret;
1383 
1384 	ret = xt_jumpstack_alloc(newinfo);
1385 	if (ret < 0) {
1386 		*error = ret;
1387 		return NULL;
1388 	}
1389 
1390 	/* Do the substitution. */
1391 	local_bh_disable();
1392 	private = table->private;
1393 
1394 	/* Check inside lock: is the old number correct? */
1395 	if (num_counters != private->number) {
1396 		pr_debug("num_counters != table->private->number (%u/%u)\n",
1397 			 num_counters, private->number);
1398 		local_bh_enable();
1399 		*error = -EAGAIN;
1400 		return NULL;
1401 	}
1402 
1403 	newinfo->initial_entries = private->initial_entries;
1404 	/*
1405 	 * Ensure contents of newinfo are visible before assigning to
1406 	 * private.
1407 	 */
1408 	smp_wmb();
1409 	table->private = newinfo;
1410 
1411 	/* make sure all cpus see new ->private value */
1412 	smp_mb();
1413 
1414 	/*
1415 	 * Even though table entries have now been swapped, other CPU's
1416 	 * may still be using the old entries...
1417 	 */
1418 	local_bh_enable();
1419 
1420 	/* ... so wait for even xt_recseq on all cpus */
1421 	for_each_possible_cpu(cpu) {
1422 		seqcount_t *s = &per_cpu(xt_recseq, cpu);
1423 		u32 seq = raw_read_seqcount(s);
1424 
1425 		if (seq & 1) {
1426 			do {
1427 				cond_resched();
1428 				cpu_relax();
1429 			} while (seq == raw_read_seqcount(s));
1430 		}
1431 	}
1432 
1433 	audit_log_nfcfg(table->name, table->af, private->number,
1434 			!private->number ? AUDIT_XT_OP_REGISTER :
1435 					   AUDIT_XT_OP_REPLACE,
1436 			GFP_KERNEL);
1437 	return private;
1438 }
1439 EXPORT_SYMBOL_GPL(xt_replace_table);
1440 
1441 struct xt_table *xt_register_table(struct net *net,
1442 				   const struct xt_table *input_table,
1443 				   struct xt_table_info *bootstrap,
1444 				   struct xt_table_info *newinfo)
1445 {
1446 	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
1447 	struct xt_table_info *private;
1448 	struct xt_table *t, *table;
1449 	int ret;
1450 
1451 	/* Don't add one object to multiple lists. */
1452 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
1453 	if (!table) {
1454 		ret = -ENOMEM;
1455 		goto out;
1456 	}
1457 
1458 	mutex_lock(&xt[table->af].mutex);
1459 	/* Don't autoload: we'd eat our tail... */
1460 	list_for_each_entry(t, &xt_net->tables[table->af], list) {
1461 		if (strcmp(t->name, table->name) == 0) {
1462 			ret = -EEXIST;
1463 			goto unlock;
1464 		}
1465 	}
1466 
1467 	/* Simplifies replace_table code. */
1468 	table->private = bootstrap;
1469 
1470 	if (!xt_replace_table(table, 0, newinfo, &ret))
1471 		goto unlock;
1472 
1473 	private = table->private;
1474 	pr_debug("table->private->number = %u\n", private->number);
1475 
1476 	/* save number of initial entries */
1477 	private->initial_entries = private->number;
1478 
1479 	list_add(&table->list, &xt_net->tables[table->af]);
1480 	mutex_unlock(&xt[table->af].mutex);
1481 	return table;
1482 
1483 unlock:
1484 	mutex_unlock(&xt[table->af].mutex);
1485 	kfree(table);
1486 out:
1487 	return ERR_PTR(ret);
1488 }
1489 EXPORT_SYMBOL_GPL(xt_register_table);
1490 
1491 void *xt_unregister_table(struct xt_table *table)
1492 {
1493 	struct xt_table_info *private;
1494 
1495 	mutex_lock(&xt[table->af].mutex);
1496 	private = table->private;
1497 	list_del(&table->list);
1498 	mutex_unlock(&xt[table->af].mutex);
1499 	audit_log_nfcfg(table->name, table->af, private->number,
1500 			AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
1501 	kfree(table->ops);
1502 	kfree(table);
1503 
1504 	return private;
1505 }
1506 EXPORT_SYMBOL_GPL(xt_unregister_table);
1507 
1508 #ifdef CONFIG_PROC_FS
1509 static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
1510 {
1511 	u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
1512 	struct net *net = seq_file_net(seq);
1513 	struct xt_pernet *xt_net;
1514 
1515 	xt_net = net_generic(net, xt_pernet_id);
1516 
1517 	mutex_lock(&xt[af].mutex);
1518 	return seq_list_start(&xt_net->tables[af], *pos);
1519 }
1520 
1521 static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1522 {
1523 	u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
1524 	struct net *net = seq_file_net(seq);
1525 	struct xt_pernet *xt_net;
1526 
1527 	xt_net = net_generic(net, xt_pernet_id);
1528 
1529 	return seq_list_next(v, &xt_net->tables[af], pos);
1530 }
1531 
1532 static void xt_table_seq_stop(struct seq_file *seq, void *v)
1533 {
1534 	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
1535 
1536 	mutex_unlock(&xt[af].mutex);
1537 }
1538 
1539 static int xt_table_seq_show(struct seq_file *seq, void *v)
1540 {
1541 	struct xt_table *table = list_entry(v, struct xt_table, list);
1542 
1543 	if (*table->name)
1544 		seq_printf(seq, "%s\n", table->name);
1545 	return 0;
1546 }
1547 
1548 static const struct seq_operations xt_table_seq_ops = {
1549 	.start	= xt_table_seq_start,
1550 	.next	= xt_table_seq_next,
1551 	.stop	= xt_table_seq_stop,
1552 	.show	= xt_table_seq_show,
1553 };
1554 
1555 /*
1556  * Traverse state for ip{,6}_{tables,matches} for helping crossing
1557  * the multi-AF mutexes.
1558  */
1559 struct nf_mttg_trav {
1560 	struct list_head *head, *curr;
1561 	uint8_t class;
1562 };
1563 
1564 enum {
1565 	MTTG_TRAV_INIT,
1566 	MTTG_TRAV_NFP_UNSPEC,
1567 	MTTG_TRAV_NFP_SPEC,
1568 	MTTG_TRAV_DONE,
1569 };
1570 
1571 static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
1572     bool is_target)
1573 {
1574 	static const uint8_t next_class[] = {
1575 		[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
1576 		[MTTG_TRAV_NFP_SPEC]   = MTTG_TRAV_DONE,
1577 	};
1578 	uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
1579 	struct nf_mttg_trav *trav = seq->private;
1580 
1581 	if (ppos != NULL)
1582 		++(*ppos);
1583 
1584 	switch (trav->class) {
1585 	case MTTG_TRAV_INIT:
1586 		trav->class = MTTG_TRAV_NFP_UNSPEC;
1587 		mutex_lock(&xt[NFPROTO_UNSPEC].mutex);
1588 		trav->head = trav->curr = is_target ?
1589 			&xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match;
1590  		break;
1591 	case MTTG_TRAV_NFP_UNSPEC:
1592 		trav->curr = trav->curr->next;
1593 		if (trav->curr != trav->head)
1594 			break;
1595 		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
1596 		mutex_lock(&xt[nfproto].mutex);
1597 		trav->head = trav->curr = is_target ?
1598 			&xt[nfproto].target : &xt[nfproto].match;
1599 		trav->class = next_class[trav->class];
1600 		break;
1601 	case MTTG_TRAV_NFP_SPEC:
1602 		trav->curr = trav->curr->next;
1603 		if (trav->curr != trav->head)
1604 			break;
1605 		fallthrough;
1606 	default:
1607 		return NULL;
1608 	}
1609 	return trav;
1610 }
1611 
1612 static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
1613     bool is_target)
1614 {
1615 	struct nf_mttg_trav *trav = seq->private;
1616 	unsigned int j;
1617 
1618 	trav->class = MTTG_TRAV_INIT;
1619 	for (j = 0; j < *pos; ++j)
1620 		if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL)
1621 			return NULL;
1622 	return trav;
1623 }
1624 
1625 static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
1626 {
1627 	uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
1628 	struct nf_mttg_trav *trav = seq->private;
1629 
1630 	switch (trav->class) {
1631 	case MTTG_TRAV_NFP_UNSPEC:
1632 		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
1633 		break;
1634 	case MTTG_TRAV_NFP_SPEC:
1635 		mutex_unlock(&xt[nfproto].mutex);
1636 		break;
1637 	}
1638 }
1639 
1640 static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
1641 {
1642 	return xt_mttg_seq_start(seq, pos, false);
1643 }
1644 
1645 static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
1646 {
1647 	return xt_mttg_seq_next(seq, v, ppos, false);
1648 }
1649 
1650 static int xt_match_seq_show(struct seq_file *seq, void *v)
1651 {
1652 	const struct nf_mttg_trav *trav = seq->private;
1653 	const struct xt_match *match;
1654 
1655 	switch (trav->class) {
1656 	case MTTG_TRAV_NFP_UNSPEC:
1657 	case MTTG_TRAV_NFP_SPEC:
1658 		if (trav->curr == trav->head)
1659 			return 0;
1660 		match = list_entry(trav->curr, struct xt_match, list);
1661 		if (*match->name)
1662 			seq_printf(seq, "%s\n", match->name);
1663 	}
1664 	return 0;
1665 }
1666 
1667 static const struct seq_operations xt_match_seq_ops = {
1668 	.start	= xt_match_seq_start,
1669 	.next	= xt_match_seq_next,
1670 	.stop	= xt_mttg_seq_stop,
1671 	.show	= xt_match_seq_show,
1672 };
1673 
1674 static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
1675 {
1676 	return xt_mttg_seq_start(seq, pos, true);
1677 }
1678 
1679 static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
1680 {
1681 	return xt_mttg_seq_next(seq, v, ppos, true);
1682 }
1683 
1684 static int xt_target_seq_show(struct seq_file *seq, void *v)
1685 {
1686 	const struct nf_mttg_trav *trav = seq->private;
1687 	const struct xt_target *target;
1688 
1689 	switch (trav->class) {
1690 	case MTTG_TRAV_NFP_UNSPEC:
1691 	case MTTG_TRAV_NFP_SPEC:
1692 		if (trav->curr == trav->head)
1693 			return 0;
1694 		target = list_entry(trav->curr, struct xt_target, list);
1695 		if (*target->name)
1696 			seq_printf(seq, "%s\n", target->name);
1697 	}
1698 	return 0;
1699 }
1700 
1701 static const struct seq_operations xt_target_seq_ops = {
1702 	.start	= xt_target_seq_start,
1703 	.next	= xt_target_seq_next,
1704 	.stop	= xt_mttg_seq_stop,
1705 	.show	= xt_target_seq_show,
1706 };
1707 
1708 #define FORMAT_TABLES	"_tables_names"
1709 #define	FORMAT_MATCHES	"_tables_matches"
1710 #define FORMAT_TARGETS 	"_tables_targets"
1711 
1712 #endif /* CONFIG_PROC_FS */
1713 
1714 /**
1715  * xt_hook_ops_alloc - set up hooks for a new table
1716  * @table:	table with metadata needed to set up hooks
1717  * @fn:		Hook function
1718  *
1719  * This function will create the nf_hook_ops that the x_table needs
1720  * to hand to xt_hook_link_net().
1721  */
1722 struct nf_hook_ops *
1723 xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
1724 {
1725 	unsigned int hook_mask = table->valid_hooks;
1726 	uint8_t i, num_hooks = hweight32(hook_mask);
1727 	uint8_t hooknum;
1728 	struct nf_hook_ops *ops;
1729 
1730 	if (!num_hooks)
1731 		return ERR_PTR(-EINVAL);
1732 
1733 	ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL);
1734 	if (ops == NULL)
1735 		return ERR_PTR(-ENOMEM);
1736 
1737 	for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
1738 	     hook_mask >>= 1, ++hooknum) {
1739 		if (!(hook_mask & 1))
1740 			continue;
1741 		ops[i].hook     = fn;
1742 		ops[i].pf       = table->af;
1743 		ops[i].hooknum  = hooknum;
1744 		ops[i].priority = table->priority;
1745 		++i;
1746 	}
1747 
1748 	return ops;
1749 }
1750 EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
1751 
1752 int xt_proto_init(struct net *net, u_int8_t af)
1753 {
1754 #ifdef CONFIG_PROC_FS
1755 	char buf[XT_FUNCTION_MAXNAMELEN];
1756 	struct proc_dir_entry *proc;
1757 	kuid_t root_uid;
1758 	kgid_t root_gid;
1759 #endif
1760 
1761 	if (af >= ARRAY_SIZE(xt_prefix))
1762 		return -EINVAL;
1763 
1764 
1765 #ifdef CONFIG_PROC_FS
1766 	root_uid = make_kuid(net->user_ns, 0);
1767 	root_gid = make_kgid(net->user_ns, 0);
1768 
1769 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1770 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
1771 	proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops,
1772 			sizeof(struct seq_net_private),
1773 			(void *)(unsigned long)af);
1774 	if (!proc)
1775 		goto out;
1776 	if (uid_valid(root_uid) && gid_valid(root_gid))
1777 		proc_set_user(proc, root_uid, root_gid);
1778 
1779 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1780 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
1781 	proc = proc_create_seq_private(buf, 0440, net->proc_net,
1782 			&xt_match_seq_ops, sizeof(struct nf_mttg_trav),
1783 			(void *)(unsigned long)af);
1784 	if (!proc)
1785 		goto out_remove_tables;
1786 	if (uid_valid(root_uid) && gid_valid(root_gid))
1787 		proc_set_user(proc, root_uid, root_gid);
1788 
1789 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1790 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
1791 	proc = proc_create_seq_private(buf, 0440, net->proc_net,
1792 			 &xt_target_seq_ops, sizeof(struct nf_mttg_trav),
1793 			 (void *)(unsigned long)af);
1794 	if (!proc)
1795 		goto out_remove_matches;
1796 	if (uid_valid(root_uid) && gid_valid(root_gid))
1797 		proc_set_user(proc, root_uid, root_gid);
1798 #endif
1799 
1800 	return 0;
1801 
1802 #ifdef CONFIG_PROC_FS
1803 out_remove_matches:
1804 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1805 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
1806 	remove_proc_entry(buf, net->proc_net);
1807 
1808 out_remove_tables:
1809 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1810 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
1811 	remove_proc_entry(buf, net->proc_net);
1812 out:
1813 	return -1;
1814 #endif
1815 }
1816 EXPORT_SYMBOL_GPL(xt_proto_init);
1817 
1818 void xt_proto_fini(struct net *net, u_int8_t af)
1819 {
1820 #ifdef CONFIG_PROC_FS
1821 	char buf[XT_FUNCTION_MAXNAMELEN];
1822 
1823 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1824 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
1825 	remove_proc_entry(buf, net->proc_net);
1826 
1827 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1828 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
1829 	remove_proc_entry(buf, net->proc_net);
1830 
1831 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1832 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
1833 	remove_proc_entry(buf, net->proc_net);
1834 #endif /*CONFIG_PROC_FS*/
1835 }
1836 EXPORT_SYMBOL_GPL(xt_proto_fini);
1837 
1838 /**
1839  * xt_percpu_counter_alloc - allocate x_tables rule counter
1840  *
1841  * @state: pointer to xt_percpu allocation state
1842  * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
1843  *
1844  * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
1845  * contain the address of the real (percpu) counter.
1846  *
1847  * Rule evaluation needs to use xt_get_this_cpu_counter() helper
1848  * to fetch the real percpu counter.
1849  *
1850  * To speed up allocation and improve data locality, a 4kb block is
1851  * allocated.  Freeing any counter may free an entire block, so all
1852  * counters allocated using the same state must be freed at the same
1853  * time.
1854  *
1855  * xt_percpu_counter_alloc_state contains the base address of the
1856  * allocated page and the current sub-offset.
1857  *
1858  * returns false on error.
1859  */
1860 bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
1861 			     struct xt_counters *counter)
1862 {
1863 	BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
1864 
1865 	if (nr_cpu_ids <= 1)
1866 		return true;
1867 
1868 	if (!state->mem) {
1869 		state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
1870 					    XT_PCPU_BLOCK_SIZE);
1871 		if (!state->mem)
1872 			return false;
1873 	}
1874 	counter->pcnt = (__force unsigned long)(state->mem + state->off);
1875 	state->off += sizeof(*counter);
1876 	if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
1877 		state->mem = NULL;
1878 		state->off = 0;
1879 	}
1880 	return true;
1881 }
1882 EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
1883 
1884 void xt_percpu_counter_free(struct xt_counters *counters)
1885 {
1886 	unsigned long pcnt = counters->pcnt;
1887 
1888 	if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
1889 		free_percpu((void __percpu *)pcnt);
1890 }
1891 EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
1892 
1893 static int __net_init xt_net_init(struct net *net)
1894 {
1895 	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
1896 	int i;
1897 
1898 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
1899 		INIT_LIST_HEAD(&xt_net->tables[i]);
1900 	return 0;
1901 }
1902 
1903 static void __net_exit xt_net_exit(struct net *net)
1904 {
1905 	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
1906 	int i;
1907 
1908 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
1909 		WARN_ON_ONCE(!list_empty(&xt_net->tables[i]));
1910 }
1911 
1912 static struct pernet_operations xt_net_ops = {
1913 	.init = xt_net_init,
1914 	.exit = xt_net_exit,
1915 	.id   = &xt_pernet_id,
1916 	.size = sizeof(struct xt_pernet),
1917 };
1918 
1919 static int __init xt_init(void)
1920 {
1921 	unsigned int i;
1922 	int rv;
1923 
1924 	for_each_possible_cpu(i) {
1925 		seqcount_init(&per_cpu(xt_recseq, i));
1926 	}
1927 
1928 	xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
1929 	if (!xt)
1930 		return -ENOMEM;
1931 
1932 	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
1933 		mutex_init(&xt[i].mutex);
1934 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
1935 		mutex_init(&xt[i].compat_mutex);
1936 		xt[i].compat_tab = NULL;
1937 #endif
1938 		INIT_LIST_HEAD(&xt[i].target);
1939 		INIT_LIST_HEAD(&xt[i].match);
1940 	}
1941 	rv = register_pernet_subsys(&xt_net_ops);
1942 	if (rv < 0)
1943 		kfree(xt);
1944 	return rv;
1945 }
1946 
1947 static void __exit xt_fini(void)
1948 {
1949 	unregister_pernet_subsys(&xt_net_ops);
1950 	kfree(xt);
1951 }
1952 
1953 module_init(xt_init);
1954 module_exit(xt_fini);
1955 
1956