xref: /openbmc/linux/net/netfilter/x_tables.c (revision 852a53a0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * x_tables core - Backend for {ip,ip6,arp}_tables
4  *
5  * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
6  * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
7  *
8  * Based on existing ip_tables code which is
9  *   Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
10  *   Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
11  */
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/socket.h>
16 #include <linux/net.h>
17 #include <linux/proc_fs.h>
18 #include <linux/seq_file.h>
19 #include <linux/string.h>
20 #include <linux/vmalloc.h>
21 #include <linux/mutex.h>
22 #include <linux/mm.h>
23 #include <linux/slab.h>
24 #include <linux/audit.h>
25 #include <linux/user_namespace.h>
26 #include <net/net_namespace.h>
27 
28 #include <linux/netfilter/x_tables.h>
29 #include <linux/netfilter_arp.h>
30 #include <linux/netfilter_ipv4/ip_tables.h>
31 #include <linux/netfilter_ipv6/ip6_tables.h>
32 #include <linux/netfilter_arp/arp_tables.h>
33 
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
36 MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
37 
38 #define XT_PCPU_BLOCK_SIZE 4096
39 #define XT_MAX_TABLE_SIZE	(512 * 1024 * 1024)
40 
41 struct compat_delta {
42 	unsigned int offset; /* offset in kernel */
43 	int delta; /* delta in 32bit user land */
44 };
45 
46 struct xt_af {
47 	struct mutex mutex;
48 	struct list_head match;
49 	struct list_head target;
50 #ifdef CONFIG_COMPAT
51 	struct mutex compat_mutex;
52 	struct compat_delta *compat_tab;
53 	unsigned int number; /* number of slots in compat_tab[] */
54 	unsigned int cur; /* number of used slots in compat_tab[] */
55 #endif
56 };
57 
58 static struct xt_af *xt;
59 
60 static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
61 	[NFPROTO_UNSPEC] = "x",
62 	[NFPROTO_IPV4]   = "ip",
63 	[NFPROTO_ARP]    = "arp",
64 	[NFPROTO_BRIDGE] = "eb",
65 	[NFPROTO_IPV6]   = "ip6",
66 };
67 
68 /* Registration hooks for targets. */
69 int xt_register_target(struct xt_target *target)
70 {
71 	u_int8_t af = target->family;
72 
73 	mutex_lock(&xt[af].mutex);
74 	list_add(&target->list, &xt[af].target);
75 	mutex_unlock(&xt[af].mutex);
76 	return 0;
77 }
78 EXPORT_SYMBOL(xt_register_target);
79 
80 void
81 xt_unregister_target(struct xt_target *target)
82 {
83 	u_int8_t af = target->family;
84 
85 	mutex_lock(&xt[af].mutex);
86 	list_del(&target->list);
87 	mutex_unlock(&xt[af].mutex);
88 }
89 EXPORT_SYMBOL(xt_unregister_target);
90 
91 int
92 xt_register_targets(struct xt_target *target, unsigned int n)
93 {
94 	unsigned int i;
95 	int err = 0;
96 
97 	for (i = 0; i < n; i++) {
98 		err = xt_register_target(&target[i]);
99 		if (err)
100 			goto err;
101 	}
102 	return err;
103 
104 err:
105 	if (i > 0)
106 		xt_unregister_targets(target, i);
107 	return err;
108 }
109 EXPORT_SYMBOL(xt_register_targets);
110 
111 void
112 xt_unregister_targets(struct xt_target *target, unsigned int n)
113 {
114 	while (n-- > 0)
115 		xt_unregister_target(&target[n]);
116 }
117 EXPORT_SYMBOL(xt_unregister_targets);
118 
119 int xt_register_match(struct xt_match *match)
120 {
121 	u_int8_t af = match->family;
122 
123 	mutex_lock(&xt[af].mutex);
124 	list_add(&match->list, &xt[af].match);
125 	mutex_unlock(&xt[af].mutex);
126 	return 0;
127 }
128 EXPORT_SYMBOL(xt_register_match);
129 
130 void
131 xt_unregister_match(struct xt_match *match)
132 {
133 	u_int8_t af = match->family;
134 
135 	mutex_lock(&xt[af].mutex);
136 	list_del(&match->list);
137 	mutex_unlock(&xt[af].mutex);
138 }
139 EXPORT_SYMBOL(xt_unregister_match);
140 
141 int
142 xt_register_matches(struct xt_match *match, unsigned int n)
143 {
144 	unsigned int i;
145 	int err = 0;
146 
147 	for (i = 0; i < n; i++) {
148 		err = xt_register_match(&match[i]);
149 		if (err)
150 			goto err;
151 	}
152 	return err;
153 
154 err:
155 	if (i > 0)
156 		xt_unregister_matches(match, i);
157 	return err;
158 }
159 EXPORT_SYMBOL(xt_register_matches);
160 
161 void
162 xt_unregister_matches(struct xt_match *match, unsigned int n)
163 {
164 	while (n-- > 0)
165 		xt_unregister_match(&match[n]);
166 }
167 EXPORT_SYMBOL(xt_unregister_matches);
168 
169 
170 /*
171  * These are weird, but module loading must not be done with mutex
172  * held (since they will register), and we have to have a single
173  * function to use.
174  */
175 
176 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
177 struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
178 {
179 	struct xt_match *m;
180 	int err = -ENOENT;
181 
182 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
183 		return ERR_PTR(-EINVAL);
184 
185 	mutex_lock(&xt[af].mutex);
186 	list_for_each_entry(m, &xt[af].match, list) {
187 		if (strcmp(m->name, name) == 0) {
188 			if (m->revision == revision) {
189 				if (try_module_get(m->me)) {
190 					mutex_unlock(&xt[af].mutex);
191 					return m;
192 				}
193 			} else
194 				err = -EPROTOTYPE; /* Found something. */
195 		}
196 	}
197 	mutex_unlock(&xt[af].mutex);
198 
199 	if (af != NFPROTO_UNSPEC)
200 		/* Try searching again in the family-independent list */
201 		return xt_find_match(NFPROTO_UNSPEC, name, revision);
202 
203 	return ERR_PTR(err);
204 }
205 EXPORT_SYMBOL(xt_find_match);
206 
207 struct xt_match *
208 xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
209 {
210 	struct xt_match *match;
211 
212 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
213 		return ERR_PTR(-EINVAL);
214 
215 	match = xt_find_match(nfproto, name, revision);
216 	if (IS_ERR(match)) {
217 		request_module("%st_%s", xt_prefix[nfproto], name);
218 		match = xt_find_match(nfproto, name, revision);
219 	}
220 
221 	return match;
222 }
223 EXPORT_SYMBOL_GPL(xt_request_find_match);
224 
225 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
226 static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
227 {
228 	struct xt_target *t;
229 	int err = -ENOENT;
230 
231 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
232 		return ERR_PTR(-EINVAL);
233 
234 	mutex_lock(&xt[af].mutex);
235 	list_for_each_entry(t, &xt[af].target, list) {
236 		if (strcmp(t->name, name) == 0) {
237 			if (t->revision == revision) {
238 				if (try_module_get(t->me)) {
239 					mutex_unlock(&xt[af].mutex);
240 					return t;
241 				}
242 			} else
243 				err = -EPROTOTYPE; /* Found something. */
244 		}
245 	}
246 	mutex_unlock(&xt[af].mutex);
247 
248 	if (af != NFPROTO_UNSPEC)
249 		/* Try searching again in the family-independent list */
250 		return xt_find_target(NFPROTO_UNSPEC, name, revision);
251 
252 	return ERR_PTR(err);
253 }
254 
255 struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
256 {
257 	struct xt_target *target;
258 
259 	if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
260 		return ERR_PTR(-EINVAL);
261 
262 	target = xt_find_target(af, name, revision);
263 	if (IS_ERR(target)) {
264 		request_module("%st_%s", xt_prefix[af], name);
265 		target = xt_find_target(af, name, revision);
266 	}
267 
268 	return target;
269 }
270 EXPORT_SYMBOL_GPL(xt_request_find_target);
271 
272 
273 static int xt_obj_to_user(u16 __user *psize, u16 size,
274 			  void __user *pname, const char *name,
275 			  u8 __user *prev, u8 rev)
276 {
277 	if (put_user(size, psize))
278 		return -EFAULT;
279 	if (copy_to_user(pname, name, strlen(name) + 1))
280 		return -EFAULT;
281 	if (put_user(rev, prev))
282 		return -EFAULT;
283 
284 	return 0;
285 }
286 
287 #define XT_OBJ_TO_USER(U, K, TYPE, C_SIZE)				\
288 	xt_obj_to_user(&U->u.TYPE##_size, C_SIZE ? : K->u.TYPE##_size,	\
289 		       U->u.user.name, K->u.kernel.TYPE->name,		\
290 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
291 
292 int xt_data_to_user(void __user *dst, const void *src,
293 		    int usersize, int size, int aligned_size)
294 {
295 	usersize = usersize ? : size;
296 	if (copy_to_user(dst, src, usersize))
297 		return -EFAULT;
298 	if (usersize != aligned_size &&
299 	    clear_user(dst + usersize, aligned_size - usersize))
300 		return -EFAULT;
301 
302 	return 0;
303 }
304 EXPORT_SYMBOL_GPL(xt_data_to_user);
305 
306 #define XT_DATA_TO_USER(U, K, TYPE)					\
307 	xt_data_to_user(U->data, K->data,				\
308 			K->u.kernel.TYPE->usersize,			\
309 			K->u.kernel.TYPE->TYPE##size,			\
310 			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
311 
312 int xt_match_to_user(const struct xt_entry_match *m,
313 		     struct xt_entry_match __user *u)
314 {
315 	return XT_OBJ_TO_USER(u, m, match, 0) ||
316 	       XT_DATA_TO_USER(u, m, match);
317 }
318 EXPORT_SYMBOL_GPL(xt_match_to_user);
319 
320 int xt_target_to_user(const struct xt_entry_target *t,
321 		      struct xt_entry_target __user *u)
322 {
323 	return XT_OBJ_TO_USER(u, t, target, 0) ||
324 	       XT_DATA_TO_USER(u, t, target);
325 }
326 EXPORT_SYMBOL_GPL(xt_target_to_user);
327 
328 static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
329 {
330 	const struct xt_match *m;
331 	int have_rev = 0;
332 
333 	list_for_each_entry(m, &xt[af].match, list) {
334 		if (strcmp(m->name, name) == 0) {
335 			if (m->revision > *bestp)
336 				*bestp = m->revision;
337 			if (m->revision == revision)
338 				have_rev = 1;
339 		}
340 	}
341 
342 	if (af != NFPROTO_UNSPEC && !have_rev)
343 		return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
344 
345 	return have_rev;
346 }
347 
348 static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
349 {
350 	const struct xt_target *t;
351 	int have_rev = 0;
352 
353 	list_for_each_entry(t, &xt[af].target, list) {
354 		if (strcmp(t->name, name) == 0) {
355 			if (t->revision > *bestp)
356 				*bestp = t->revision;
357 			if (t->revision == revision)
358 				have_rev = 1;
359 		}
360 	}
361 
362 	if (af != NFPROTO_UNSPEC && !have_rev)
363 		return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
364 
365 	return have_rev;
366 }
367 
368 /* Returns true or false (if no such extension at all) */
369 int xt_find_revision(u8 af, const char *name, u8 revision, int target,
370 		     int *err)
371 {
372 	int have_rev, best = -1;
373 
374 	mutex_lock(&xt[af].mutex);
375 	if (target == 1)
376 		have_rev = target_revfn(af, name, revision, &best);
377 	else
378 		have_rev = match_revfn(af, name, revision, &best);
379 	mutex_unlock(&xt[af].mutex);
380 
381 	/* Nothing at all?  Return 0 to try loading module. */
382 	if (best == -1) {
383 		*err = -ENOENT;
384 		return 0;
385 	}
386 
387 	*err = best;
388 	if (!have_rev)
389 		*err = -EPROTONOSUPPORT;
390 	return 1;
391 }
392 EXPORT_SYMBOL_GPL(xt_find_revision);
393 
394 static char *
395 textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
396 {
397 	static const char *const inetbr_names[] = {
398 		"PREROUTING", "INPUT", "FORWARD",
399 		"OUTPUT", "POSTROUTING", "BROUTING",
400 	};
401 	static const char *const arp_names[] = {
402 		"INPUT", "FORWARD", "OUTPUT",
403 	};
404 	const char *const *names;
405 	unsigned int i, max;
406 	char *p = buf;
407 	bool np = false;
408 	int res;
409 
410 	names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names;
411 	max   = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) :
412 	                                   ARRAY_SIZE(inetbr_names);
413 	*p = '\0';
414 	for (i = 0; i < max; ++i) {
415 		if (!(mask & (1 << i)))
416 			continue;
417 		res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
418 		if (res > 0) {
419 			size -= res;
420 			p += res;
421 		}
422 		np = true;
423 	}
424 
425 	return buf;
426 }
427 
428 /**
429  * xt_check_proc_name - check that name is suitable for /proc file creation
430  *
431  * @name: file name candidate
432  * @size: length of buffer
433  *
434  * some x_tables modules wish to create a file in /proc.
435  * This function makes sure that the name is suitable for this
436  * purpose, it checks that name is NUL terminated and isn't a 'special'
437  * name, like "..".
438  *
439  * returns negative number on error or 0 if name is useable.
440  */
441 int xt_check_proc_name(const char *name, unsigned int size)
442 {
443 	if (name[0] == '\0')
444 		return -EINVAL;
445 
446 	if (strnlen(name, size) == size)
447 		return -ENAMETOOLONG;
448 
449 	if (strcmp(name, ".") == 0 ||
450 	    strcmp(name, "..") == 0 ||
451 	    strchr(name, '/'))
452 		return -EINVAL;
453 
454 	return 0;
455 }
456 EXPORT_SYMBOL(xt_check_proc_name);
457 
458 int xt_check_match(struct xt_mtchk_param *par,
459 		   unsigned int size, u16 proto, bool inv_proto)
460 {
461 	int ret;
462 
463 	if (XT_ALIGN(par->match->matchsize) != size &&
464 	    par->match->matchsize != -1) {
465 		/*
466 		 * ebt_among is exempt from centralized matchsize checking
467 		 * because it uses a dynamic-size data set.
468 		 */
469 		pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n",
470 				   xt_prefix[par->family], par->match->name,
471 				   par->match->revision,
472 				   XT_ALIGN(par->match->matchsize), size);
473 		return -EINVAL;
474 	}
475 	if (par->match->table != NULL &&
476 	    strcmp(par->match->table, par->table) != 0) {
477 		pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n",
478 				    xt_prefix[par->family], par->match->name,
479 				    par->match->table, par->table);
480 		return -EINVAL;
481 	}
482 	if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
483 		char used[64], allow[64];
484 
485 		pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n",
486 				    xt_prefix[par->family], par->match->name,
487 				    textify_hooks(used, sizeof(used),
488 						  par->hook_mask, par->family),
489 				    textify_hooks(allow, sizeof(allow),
490 						  par->match->hooks,
491 						  par->family));
492 		return -EINVAL;
493 	}
494 	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
495 		pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n",
496 				    xt_prefix[par->family], par->match->name,
497 				    par->match->proto);
498 		return -EINVAL;
499 	}
500 	if (par->match->checkentry != NULL) {
501 		ret = par->match->checkentry(par);
502 		if (ret < 0)
503 			return ret;
504 		else if (ret > 0)
505 			/* Flag up potential errors. */
506 			return -EIO;
507 	}
508 	return 0;
509 }
510 EXPORT_SYMBOL_GPL(xt_check_match);
511 
512 /** xt_check_entry_match - check that matches end before start of target
513  *
514  * @match: beginning of xt_entry_match
515  * @target: beginning of this rules target (alleged end of matches)
516  * @alignment: alignment requirement of match structures
517  *
518  * Validates that all matches add up to the beginning of the target,
519  * and that each match covers at least the base structure size.
520  *
521  * Return: 0 on success, negative errno on failure.
522  */
523 static int xt_check_entry_match(const char *match, const char *target,
524 				const size_t alignment)
525 {
526 	const struct xt_entry_match *pos;
527 	int length = target - match;
528 
529 	if (length == 0) /* no matches */
530 		return 0;
531 
532 	pos = (struct xt_entry_match *)match;
533 	do {
534 		if ((unsigned long)pos % alignment)
535 			return -EINVAL;
536 
537 		if (length < (int)sizeof(struct xt_entry_match))
538 			return -EINVAL;
539 
540 		if (pos->u.match_size < sizeof(struct xt_entry_match))
541 			return -EINVAL;
542 
543 		if (pos->u.match_size > length)
544 			return -EINVAL;
545 
546 		length -= pos->u.match_size;
547 		pos = ((void *)((char *)(pos) + (pos)->u.match_size));
548 	} while (length > 0);
549 
550 	return 0;
551 }
552 
553 /** xt_check_table_hooks - check hook entry points are sane
554  *
555  * @info xt_table_info to check
556  * @valid_hooks - hook entry points that we can enter from
557  *
558  * Validates that the hook entry and underflows points are set up.
559  *
560  * Return: 0 on success, negative errno on failure.
561  */
562 int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks)
563 {
564 	const char *err = "unsorted underflow";
565 	unsigned int i, max_uflow, max_entry;
566 	bool check_hooks = false;
567 
568 	BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow));
569 
570 	max_entry = 0;
571 	max_uflow = 0;
572 
573 	for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) {
574 		if (!(valid_hooks & (1 << i)))
575 			continue;
576 
577 		if (info->hook_entry[i] == 0xFFFFFFFF)
578 			return -EINVAL;
579 		if (info->underflow[i] == 0xFFFFFFFF)
580 			return -EINVAL;
581 
582 		if (check_hooks) {
583 			if (max_uflow > info->underflow[i])
584 				goto error;
585 
586 			if (max_uflow == info->underflow[i]) {
587 				err = "duplicate underflow";
588 				goto error;
589 			}
590 			if (max_entry > info->hook_entry[i]) {
591 				err = "unsorted entry";
592 				goto error;
593 			}
594 			if (max_entry == info->hook_entry[i]) {
595 				err = "duplicate entry";
596 				goto error;
597 			}
598 		}
599 		max_entry = info->hook_entry[i];
600 		max_uflow = info->underflow[i];
601 		check_hooks = true;
602 	}
603 
604 	return 0;
605 error:
606 	pr_err_ratelimited("%s at hook %d\n", err, i);
607 	return -EINVAL;
608 }
609 EXPORT_SYMBOL(xt_check_table_hooks);
610 
611 static bool verdict_ok(int verdict)
612 {
613 	if (verdict > 0)
614 		return true;
615 
616 	if (verdict < 0) {
617 		int v = -verdict - 1;
618 
619 		if (verdict == XT_RETURN)
620 			return true;
621 
622 		switch (v) {
623 		case NF_ACCEPT: return true;
624 		case NF_DROP: return true;
625 		case NF_QUEUE: return true;
626 		default:
627 			break;
628 		}
629 
630 		return false;
631 	}
632 
633 	return false;
634 }
635 
636 static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
637 			const char *msg, unsigned int msglen)
638 {
639 	return usersize == kernsize && strnlen(msg, msglen) < msglen;
640 }
641 
642 #ifdef CONFIG_COMPAT
643 int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
644 {
645 	struct xt_af *xp = &xt[af];
646 
647 	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
648 
649 	if (WARN_ON(!xp->compat_tab))
650 		return -ENOMEM;
651 
652 	if (xp->cur >= xp->number)
653 		return -EINVAL;
654 
655 	if (xp->cur)
656 		delta += xp->compat_tab[xp->cur - 1].delta;
657 	xp->compat_tab[xp->cur].offset = offset;
658 	xp->compat_tab[xp->cur].delta = delta;
659 	xp->cur++;
660 	return 0;
661 }
662 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
663 
664 void xt_compat_flush_offsets(u_int8_t af)
665 {
666 	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
667 
668 	if (xt[af].compat_tab) {
669 		vfree(xt[af].compat_tab);
670 		xt[af].compat_tab = NULL;
671 		xt[af].number = 0;
672 		xt[af].cur = 0;
673 	}
674 }
675 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
676 
677 int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
678 {
679 	struct compat_delta *tmp = xt[af].compat_tab;
680 	int mid, left = 0, right = xt[af].cur - 1;
681 
682 	while (left <= right) {
683 		mid = (left + right) >> 1;
684 		if (offset > tmp[mid].offset)
685 			left = mid + 1;
686 		else if (offset < tmp[mid].offset)
687 			right = mid - 1;
688 		else
689 			return mid ? tmp[mid - 1].delta : 0;
690 	}
691 	return left ? tmp[left - 1].delta : 0;
692 }
693 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
694 
695 int xt_compat_init_offsets(u8 af, unsigned int number)
696 {
697 	size_t mem;
698 
699 	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
700 
701 	if (!number || number > (INT_MAX / sizeof(struct compat_delta)))
702 		return -EINVAL;
703 
704 	if (WARN_ON(xt[af].compat_tab))
705 		return -EINVAL;
706 
707 	mem = sizeof(struct compat_delta) * number;
708 	if (mem > XT_MAX_TABLE_SIZE)
709 		return -ENOMEM;
710 
711 	xt[af].compat_tab = vmalloc(mem);
712 	if (!xt[af].compat_tab)
713 		return -ENOMEM;
714 
715 	xt[af].number = number;
716 	xt[af].cur = 0;
717 
718 	return 0;
719 }
720 EXPORT_SYMBOL(xt_compat_init_offsets);
721 
722 int xt_compat_match_offset(const struct xt_match *match)
723 {
724 	u_int16_t csize = match->compatsize ? : match->matchsize;
725 	return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
726 }
727 EXPORT_SYMBOL_GPL(xt_compat_match_offset);
728 
729 void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
730 			       unsigned int *size)
731 {
732 	const struct xt_match *match = m->u.kernel.match;
733 	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
734 	int pad, off = xt_compat_match_offset(match);
735 	u_int16_t msize = cm->u.user.match_size;
736 	char name[sizeof(m->u.user.name)];
737 
738 	m = *dstptr;
739 	memcpy(m, cm, sizeof(*cm));
740 	if (match->compat_from_user)
741 		match->compat_from_user(m->data, cm->data);
742 	else
743 		memcpy(m->data, cm->data, msize - sizeof(*cm));
744 	pad = XT_ALIGN(match->matchsize) - match->matchsize;
745 	if (pad > 0)
746 		memset(m->data + match->matchsize, 0, pad);
747 
748 	msize += off;
749 	m->u.user.match_size = msize;
750 	strlcpy(name, match->name, sizeof(name));
751 	module_put(match->me);
752 	strncpy(m->u.user.name, name, sizeof(m->u.user.name));
753 
754 	*size += off;
755 	*dstptr += msize;
756 }
757 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
758 
759 #define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
760 	xt_data_to_user(U->data, K->data,				\
761 			K->u.kernel.TYPE->usersize,			\
762 			C_SIZE,						\
763 			COMPAT_XT_ALIGN(C_SIZE))
764 
765 int xt_compat_match_to_user(const struct xt_entry_match *m,
766 			    void __user **dstptr, unsigned int *size)
767 {
768 	const struct xt_match *match = m->u.kernel.match;
769 	struct compat_xt_entry_match __user *cm = *dstptr;
770 	int off = xt_compat_match_offset(match);
771 	u_int16_t msize = m->u.user.match_size - off;
772 
773 	if (XT_OBJ_TO_USER(cm, m, match, msize))
774 		return -EFAULT;
775 
776 	if (match->compat_to_user) {
777 		if (match->compat_to_user((void __user *)cm->data, m->data))
778 			return -EFAULT;
779 	} else {
780 		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
781 			return -EFAULT;
782 	}
783 
784 	*size -= off;
785 	*dstptr += msize;
786 	return 0;
787 }
788 EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
789 
790 /* non-compat version may have padding after verdict */
791 struct compat_xt_standard_target {
792 	struct compat_xt_entry_target t;
793 	compat_uint_t verdict;
794 };
795 
796 struct compat_xt_error_target {
797 	struct compat_xt_entry_target t;
798 	char errorname[XT_FUNCTION_MAXNAMELEN];
799 };
800 
801 int xt_compat_check_entry_offsets(const void *base, const char *elems,
802 				  unsigned int target_offset,
803 				  unsigned int next_offset)
804 {
805 	long size_of_base_struct = elems - (const char *)base;
806 	const struct compat_xt_entry_target *t;
807 	const char *e = base;
808 
809 	if (target_offset < size_of_base_struct)
810 		return -EINVAL;
811 
812 	if (target_offset + sizeof(*t) > next_offset)
813 		return -EINVAL;
814 
815 	t = (void *)(e + target_offset);
816 	if (t->u.target_size < sizeof(*t))
817 		return -EINVAL;
818 
819 	if (target_offset + t->u.target_size > next_offset)
820 		return -EINVAL;
821 
822 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
823 		const struct compat_xt_standard_target *st = (const void *)t;
824 
825 		if (COMPAT_XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
826 			return -EINVAL;
827 
828 		if (!verdict_ok(st->verdict))
829 			return -EINVAL;
830 	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
831 		const struct compat_xt_error_target *et = (const void *)t;
832 
833 		if (!error_tg_ok(t->u.target_size, sizeof(*et),
834 				 et->errorname, sizeof(et->errorname)))
835 			return -EINVAL;
836 	}
837 
838 	/* compat_xt_entry match has less strict alignment requirements,
839 	 * otherwise they are identical.  In case of padding differences
840 	 * we need to add compat version of xt_check_entry_match.
841 	 */
842 	BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
843 
844 	return xt_check_entry_match(elems, base + target_offset,
845 				    __alignof__(struct compat_xt_entry_match));
846 }
847 EXPORT_SYMBOL(xt_compat_check_entry_offsets);
848 #endif /* CONFIG_COMPAT */
849 
850 /**
851  * xt_check_entry_offsets - validate arp/ip/ip6t_entry
852  *
853  * @base: pointer to arp/ip/ip6t_entry
854  * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
855  * @target_offset: the arp/ip/ip6_t->target_offset
856  * @next_offset: the arp/ip/ip6_t->next_offset
857  *
858  * validates that target_offset and next_offset are sane and that all
859  * match sizes (if any) align with the target offset.
860  *
861  * This function does not validate the targets or matches themselves, it
862  * only tests that all the offsets and sizes are correct, that all
863  * match structures are aligned, and that the last structure ends where
864  * the target structure begins.
865  *
866  * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
867  *
868  * The arp/ip/ip6t_entry structure @base must have passed following tests:
869  * - it must point to a valid memory location
870  * - base to base + next_offset must be accessible, i.e. not exceed allocated
871  *   length.
872  *
873  * A well-formed entry looks like this:
874  *
875  * ip(6)t_entry   match [mtdata]  match [mtdata] target [tgdata] ip(6)t_entry
876  * e->elems[]-----'                              |               |
877  *                matchsize                      |               |
878  *                                matchsize      |               |
879  *                                               |               |
880  * target_offset---------------------------------'               |
881  * next_offset---------------------------------------------------'
882  *
883  * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
884  *          This is where matches (if any) and the target reside.
885  * target_offset: beginning of target.
886  * next_offset: start of the next rule; also: size of this rule.
887  * Since targets have a minimum size, target_offset + minlen <= next_offset.
888  *
889  * Every match stores its size, sum of sizes must not exceed target_offset.
890  *
891  * Return: 0 on success, negative errno on failure.
892  */
893 int xt_check_entry_offsets(const void *base,
894 			   const char *elems,
895 			   unsigned int target_offset,
896 			   unsigned int next_offset)
897 {
898 	long size_of_base_struct = elems - (const char *)base;
899 	const struct xt_entry_target *t;
900 	const char *e = base;
901 
902 	/* target start is within the ip/ip6/arpt_entry struct */
903 	if (target_offset < size_of_base_struct)
904 		return -EINVAL;
905 
906 	if (target_offset + sizeof(*t) > next_offset)
907 		return -EINVAL;
908 
909 	t = (void *)(e + target_offset);
910 	if (t->u.target_size < sizeof(*t))
911 		return -EINVAL;
912 
913 	if (target_offset + t->u.target_size > next_offset)
914 		return -EINVAL;
915 
916 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
917 		const struct xt_standard_target *st = (const void *)t;
918 
919 		if (XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
920 			return -EINVAL;
921 
922 		if (!verdict_ok(st->verdict))
923 			return -EINVAL;
924 	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
925 		const struct xt_error_target *et = (const void *)t;
926 
927 		if (!error_tg_ok(t->u.target_size, sizeof(*et),
928 				 et->errorname, sizeof(et->errorname)))
929 			return -EINVAL;
930 	}
931 
932 	return xt_check_entry_match(elems, base + target_offset,
933 				    __alignof__(struct xt_entry_match));
934 }
935 EXPORT_SYMBOL(xt_check_entry_offsets);
936 
937 /**
938  * xt_alloc_entry_offsets - allocate array to store rule head offsets
939  *
940  * @size: number of entries
941  *
942  * Return: NULL or zeroed kmalloc'd or vmalloc'd array
943  */
944 unsigned int *xt_alloc_entry_offsets(unsigned int size)
945 {
946 	if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int))
947 		return NULL;
948 
949 	return kvcalloc(size, sizeof(unsigned int), GFP_KERNEL);
950 
951 }
952 EXPORT_SYMBOL(xt_alloc_entry_offsets);
953 
954 /**
955  * xt_find_jump_offset - check if target is a valid jump offset
956  *
957  * @offsets: array containing all valid rule start offsets of a rule blob
958  * @target: the jump target to search for
959  * @size: entries in @offset
960  */
961 bool xt_find_jump_offset(const unsigned int *offsets,
962 			 unsigned int target, unsigned int size)
963 {
964 	int m, low = 0, hi = size;
965 
966 	while (hi > low) {
967 		m = (low + hi) / 2u;
968 
969 		if (offsets[m] > target)
970 			hi = m;
971 		else if (offsets[m] < target)
972 			low = m + 1;
973 		else
974 			return true;
975 	}
976 
977 	return false;
978 }
979 EXPORT_SYMBOL(xt_find_jump_offset);
980 
981 int xt_check_target(struct xt_tgchk_param *par,
982 		    unsigned int size, u16 proto, bool inv_proto)
983 {
984 	int ret;
985 
986 	if (XT_ALIGN(par->target->targetsize) != size) {
987 		pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n",
988 				   xt_prefix[par->family], par->target->name,
989 				   par->target->revision,
990 				   XT_ALIGN(par->target->targetsize), size);
991 		return -EINVAL;
992 	}
993 	if (par->target->table != NULL &&
994 	    strcmp(par->target->table, par->table) != 0) {
995 		pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n",
996 				    xt_prefix[par->family], par->target->name,
997 				    par->target->table, par->table);
998 		return -EINVAL;
999 	}
1000 	if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
1001 		char used[64], allow[64];
1002 
1003 		pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n",
1004 				    xt_prefix[par->family], par->target->name,
1005 				    textify_hooks(used, sizeof(used),
1006 						  par->hook_mask, par->family),
1007 				    textify_hooks(allow, sizeof(allow),
1008 						  par->target->hooks,
1009 						  par->family));
1010 		return -EINVAL;
1011 	}
1012 	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
1013 		pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n",
1014 				    xt_prefix[par->family], par->target->name,
1015 				    par->target->proto);
1016 		return -EINVAL;
1017 	}
1018 	if (par->target->checkentry != NULL) {
1019 		ret = par->target->checkentry(par);
1020 		if (ret < 0)
1021 			return ret;
1022 		else if (ret > 0)
1023 			/* Flag up potential errors. */
1024 			return -EIO;
1025 	}
1026 	return 0;
1027 }
1028 EXPORT_SYMBOL_GPL(xt_check_target);
1029 
1030 /**
1031  * xt_copy_counters - copy counters and metadata from a sockptr_t
1032  *
1033  * @arg: src sockptr
1034  * @len: alleged size of userspace memory
1035  * @info: where to store the xt_counters_info metadata
1036  *
1037  * Copies counter meta data from @user and stores it in @info.
1038  *
1039  * vmallocs memory to hold the counters, then copies the counter data
1040  * from @user to the new memory and returns a pointer to it.
1041  *
1042  * If called from a compat syscall, @info gets converted automatically to the
1043  * 64bit representation.
1044  *
1045  * The metadata associated with the counters is stored in @info.
1046  *
1047  * Return: returns pointer that caller has to test via IS_ERR().
1048  * If IS_ERR is false, caller has to vfree the pointer.
1049  */
1050 void *xt_copy_counters(sockptr_t arg, unsigned int len,
1051 		       struct xt_counters_info *info)
1052 {
1053 	size_t offset;
1054 	void *mem;
1055 	u64 size;
1056 
1057 #ifdef CONFIG_COMPAT
1058 	if (in_compat_syscall()) {
1059 		/* structures only differ in size due to alignment */
1060 		struct compat_xt_counters_info compat_tmp;
1061 
1062 		if (len <= sizeof(compat_tmp))
1063 			return ERR_PTR(-EINVAL);
1064 
1065 		len -= sizeof(compat_tmp);
1066 		if (copy_from_sockptr(&compat_tmp, arg, sizeof(compat_tmp)) != 0)
1067 			return ERR_PTR(-EFAULT);
1068 
1069 		memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
1070 		info->num_counters = compat_tmp.num_counters;
1071 		offset = sizeof(compat_tmp);
1072 	} else
1073 #endif
1074 	{
1075 		if (len <= sizeof(*info))
1076 			return ERR_PTR(-EINVAL);
1077 
1078 		len -= sizeof(*info);
1079 		if (copy_from_sockptr(info, arg, sizeof(*info)) != 0)
1080 			return ERR_PTR(-EFAULT);
1081 
1082 		offset = sizeof(*info);
1083 	}
1084 	info->name[sizeof(info->name) - 1] = '\0';
1085 
1086 	size = sizeof(struct xt_counters);
1087 	size *= info->num_counters;
1088 
1089 	if (size != (u64)len)
1090 		return ERR_PTR(-EINVAL);
1091 
1092 	mem = vmalloc(len);
1093 	if (!mem)
1094 		return ERR_PTR(-ENOMEM);
1095 
1096 	if (copy_from_sockptr_offset(mem, arg, offset, len) == 0)
1097 		return mem;
1098 
1099 	vfree(mem);
1100 	return ERR_PTR(-EFAULT);
1101 }
1102 EXPORT_SYMBOL_GPL(xt_copy_counters);
1103 
1104 #ifdef CONFIG_COMPAT
1105 int xt_compat_target_offset(const struct xt_target *target)
1106 {
1107 	u_int16_t csize = target->compatsize ? : target->targetsize;
1108 	return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
1109 }
1110 EXPORT_SYMBOL_GPL(xt_compat_target_offset);
1111 
1112 void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
1113 				unsigned int *size)
1114 {
1115 	const struct xt_target *target = t->u.kernel.target;
1116 	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
1117 	int pad, off = xt_compat_target_offset(target);
1118 	u_int16_t tsize = ct->u.user.target_size;
1119 	char name[sizeof(t->u.user.name)];
1120 
1121 	t = *dstptr;
1122 	memcpy(t, ct, sizeof(*ct));
1123 	if (target->compat_from_user)
1124 		target->compat_from_user(t->data, ct->data);
1125 	else
1126 		memcpy(t->data, ct->data, tsize - sizeof(*ct));
1127 	pad = XT_ALIGN(target->targetsize) - target->targetsize;
1128 	if (pad > 0)
1129 		memset(t->data + target->targetsize, 0, pad);
1130 
1131 	tsize += off;
1132 	t->u.user.target_size = tsize;
1133 	strlcpy(name, target->name, sizeof(name));
1134 	module_put(target->me);
1135 	strncpy(t->u.user.name, name, sizeof(t->u.user.name));
1136 
1137 	*size += off;
1138 	*dstptr += tsize;
1139 }
1140 EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
1141 
1142 int xt_compat_target_to_user(const struct xt_entry_target *t,
1143 			     void __user **dstptr, unsigned int *size)
1144 {
1145 	const struct xt_target *target = t->u.kernel.target;
1146 	struct compat_xt_entry_target __user *ct = *dstptr;
1147 	int off = xt_compat_target_offset(target);
1148 	u_int16_t tsize = t->u.user.target_size - off;
1149 
1150 	if (XT_OBJ_TO_USER(ct, t, target, tsize))
1151 		return -EFAULT;
1152 
1153 	if (target->compat_to_user) {
1154 		if (target->compat_to_user((void __user *)ct->data, t->data))
1155 			return -EFAULT;
1156 	} else {
1157 		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
1158 			return -EFAULT;
1159 	}
1160 
1161 	*size -= off;
1162 	*dstptr += tsize;
1163 	return 0;
1164 }
1165 EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
1166 #endif
1167 
1168 struct xt_table_info *xt_alloc_table_info(unsigned int size)
1169 {
1170 	struct xt_table_info *info = NULL;
1171 	size_t sz = sizeof(*info) + size;
1172 
1173 	if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE)
1174 		return NULL;
1175 
1176 	info = kvmalloc(sz, GFP_KERNEL_ACCOUNT);
1177 	if (!info)
1178 		return NULL;
1179 
1180 	memset(info, 0, sizeof(*info));
1181 	info->size = size;
1182 	return info;
1183 }
1184 EXPORT_SYMBOL(xt_alloc_table_info);
1185 
1186 void xt_free_table_info(struct xt_table_info *info)
1187 {
1188 	int cpu;
1189 
1190 	if (info->jumpstack != NULL) {
1191 		for_each_possible_cpu(cpu)
1192 			kvfree(info->jumpstack[cpu]);
1193 		kvfree(info->jumpstack);
1194 	}
1195 
1196 	kvfree(info);
1197 }
1198 EXPORT_SYMBOL(xt_free_table_info);
1199 
1200 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR on error. */
1201 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
1202 				    const char *name)
1203 {
1204 	struct xt_table *t, *found = NULL;
1205 
1206 	mutex_lock(&xt[af].mutex);
1207 	list_for_each_entry(t, &net->xt.tables[af], list)
1208 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
1209 			return t;
1210 
1211 	if (net == &init_net)
1212 		goto out;
1213 
1214 	/* Table doesn't exist in this netns, re-try init */
1215 	list_for_each_entry(t, &init_net.xt.tables[af], list) {
1216 		int err;
1217 
1218 		if (strcmp(t->name, name))
1219 			continue;
1220 		if (!try_module_get(t->me))
1221 			goto out;
1222 		mutex_unlock(&xt[af].mutex);
1223 		err = t->table_init(net);
1224 		if (err < 0) {
1225 			module_put(t->me);
1226 			return ERR_PTR(err);
1227 		}
1228 
1229 		found = t;
1230 
1231 		mutex_lock(&xt[af].mutex);
1232 		break;
1233 	}
1234 
1235 	if (!found)
1236 		goto out;
1237 
1238 	/* and once again: */
1239 	list_for_each_entry(t, &net->xt.tables[af], list)
1240 		if (strcmp(t->name, name) == 0)
1241 			return t;
1242 
1243 	module_put(found->me);
1244  out:
1245 	mutex_unlock(&xt[af].mutex);
1246 	return ERR_PTR(-ENOENT);
1247 }
1248 EXPORT_SYMBOL_GPL(xt_find_table_lock);
1249 
1250 struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
1251 					    const char *name)
1252 {
1253 	struct xt_table *t = xt_find_table_lock(net, af, name);
1254 
1255 #ifdef CONFIG_MODULES
1256 	if (IS_ERR(t)) {
1257 		int err = request_module("%stable_%s", xt_prefix[af], name);
1258 		if (err < 0)
1259 			return ERR_PTR(err);
1260 		t = xt_find_table_lock(net, af, name);
1261 	}
1262 #endif
1263 
1264 	return t;
1265 }
1266 EXPORT_SYMBOL_GPL(xt_request_find_table_lock);
1267 
1268 void xt_table_unlock(struct xt_table *table)
1269 {
1270 	mutex_unlock(&xt[table->af].mutex);
1271 }
1272 EXPORT_SYMBOL_GPL(xt_table_unlock);
1273 
1274 #ifdef CONFIG_COMPAT
1275 void xt_compat_lock(u_int8_t af)
1276 {
1277 	mutex_lock(&xt[af].compat_mutex);
1278 }
1279 EXPORT_SYMBOL_GPL(xt_compat_lock);
1280 
1281 void xt_compat_unlock(u_int8_t af)
1282 {
1283 	mutex_unlock(&xt[af].compat_mutex);
1284 }
1285 EXPORT_SYMBOL_GPL(xt_compat_unlock);
1286 #endif
1287 
1288 DEFINE_PER_CPU(seqcount_t, xt_recseq);
1289 EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
1290 
1291 struct static_key xt_tee_enabled __read_mostly;
1292 EXPORT_SYMBOL_GPL(xt_tee_enabled);
1293 
1294 static int xt_jumpstack_alloc(struct xt_table_info *i)
1295 {
1296 	unsigned int size;
1297 	int cpu;
1298 
1299 	size = sizeof(void **) * nr_cpu_ids;
1300 	if (size > PAGE_SIZE)
1301 		i->jumpstack = kvzalloc(size, GFP_KERNEL);
1302 	else
1303 		i->jumpstack = kzalloc(size, GFP_KERNEL);
1304 	if (i->jumpstack == NULL)
1305 		return -ENOMEM;
1306 
1307 	/* ruleset without jumps -- no stack needed */
1308 	if (i->stacksize == 0)
1309 		return 0;
1310 
1311 	/* Jumpstack needs to be able to record two full callchains, one
1312 	 * from the first rule set traversal, plus one table reentrancy
1313 	 * via -j TEE without clobbering the callchain that brought us to
1314 	 * TEE target.
1315 	 *
1316 	 * This is done by allocating two jumpstacks per cpu, on reentry
1317 	 * the upper half of the stack is used.
1318 	 *
1319 	 * see the jumpstack setup in ipt_do_table() for more details.
1320 	 */
1321 	size = sizeof(void *) * i->stacksize * 2u;
1322 	for_each_possible_cpu(cpu) {
1323 		i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL,
1324 			cpu_to_node(cpu));
1325 		if (i->jumpstack[cpu] == NULL)
1326 			/*
1327 			 * Freeing will be done later on by the callers. The
1328 			 * chain is: xt_replace_table -> __do_replace ->
1329 			 * do_replace -> xt_free_table_info.
1330 			 */
1331 			return -ENOMEM;
1332 	}
1333 
1334 	return 0;
1335 }
1336 
1337 struct xt_counters *xt_counters_alloc(unsigned int counters)
1338 {
1339 	struct xt_counters *mem;
1340 
1341 	if (counters == 0 || counters > INT_MAX / sizeof(*mem))
1342 		return NULL;
1343 
1344 	counters *= sizeof(*mem);
1345 	if (counters > XT_MAX_TABLE_SIZE)
1346 		return NULL;
1347 
1348 	return vzalloc(counters);
1349 }
1350 EXPORT_SYMBOL(xt_counters_alloc);
1351 
1352 struct xt_table_info *
1353 xt_replace_table(struct xt_table *table,
1354 	      unsigned int num_counters,
1355 	      struct xt_table_info *newinfo,
1356 	      int *error)
1357 {
1358 	struct xt_table_info *private;
1359 	unsigned int cpu;
1360 	int ret;
1361 
1362 	ret = xt_jumpstack_alloc(newinfo);
1363 	if (ret < 0) {
1364 		*error = ret;
1365 		return NULL;
1366 	}
1367 
1368 	/* Do the substitution. */
1369 	local_bh_disable();
1370 	private = table->private;
1371 
1372 	/* Check inside lock: is the old number correct? */
1373 	if (num_counters != private->number) {
1374 		pr_debug("num_counters != table->private->number (%u/%u)\n",
1375 			 num_counters, private->number);
1376 		local_bh_enable();
1377 		*error = -EAGAIN;
1378 		return NULL;
1379 	}
1380 
1381 	newinfo->initial_entries = private->initial_entries;
1382 	/*
1383 	 * Ensure contents of newinfo are visible before assigning to
1384 	 * private.
1385 	 */
1386 	smp_wmb();
1387 	table->private = newinfo;
1388 
1389 	/* make sure all cpus see new ->private value */
1390 	smp_wmb();
1391 
1392 	/*
1393 	 * Even though table entries have now been swapped, other CPU's
1394 	 * may still be using the old entries...
1395 	 */
1396 	local_bh_enable();
1397 
1398 	/* ... so wait for even xt_recseq on all cpus */
1399 	for_each_possible_cpu(cpu) {
1400 		seqcount_t *s = &per_cpu(xt_recseq, cpu);
1401 		u32 seq = raw_read_seqcount(s);
1402 
1403 		if (seq & 1) {
1404 			do {
1405 				cond_resched();
1406 				cpu_relax();
1407 			} while (seq == raw_read_seqcount(s));
1408 		}
1409 	}
1410 
1411 	audit_log_nfcfg(table->name, table->af, private->number,
1412 			!private->number ? AUDIT_XT_OP_REGISTER :
1413 					   AUDIT_XT_OP_REPLACE,
1414 			GFP_KERNEL);
1415 	return private;
1416 }
1417 EXPORT_SYMBOL_GPL(xt_replace_table);
1418 
1419 struct xt_table *xt_register_table(struct net *net,
1420 				   const struct xt_table *input_table,
1421 				   struct xt_table_info *bootstrap,
1422 				   struct xt_table_info *newinfo)
1423 {
1424 	int ret;
1425 	struct xt_table_info *private;
1426 	struct xt_table *t, *table;
1427 
1428 	/* Don't add one object to multiple lists. */
1429 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
1430 	if (!table) {
1431 		ret = -ENOMEM;
1432 		goto out;
1433 	}
1434 
1435 	mutex_lock(&xt[table->af].mutex);
1436 	/* Don't autoload: we'd eat our tail... */
1437 	list_for_each_entry(t, &net->xt.tables[table->af], list) {
1438 		if (strcmp(t->name, table->name) == 0) {
1439 			ret = -EEXIST;
1440 			goto unlock;
1441 		}
1442 	}
1443 
1444 	/* Simplifies replace_table code. */
1445 	table->private = bootstrap;
1446 
1447 	if (!xt_replace_table(table, 0, newinfo, &ret))
1448 		goto unlock;
1449 
1450 	private = table->private;
1451 	pr_debug("table->private->number = %u\n", private->number);
1452 
1453 	/* save number of initial entries */
1454 	private->initial_entries = private->number;
1455 
1456 	list_add(&table->list, &net->xt.tables[table->af]);
1457 	mutex_unlock(&xt[table->af].mutex);
1458 	return table;
1459 
1460 unlock:
1461 	mutex_unlock(&xt[table->af].mutex);
1462 	kfree(table);
1463 out:
1464 	return ERR_PTR(ret);
1465 }
1466 EXPORT_SYMBOL_GPL(xt_register_table);
1467 
1468 void *xt_unregister_table(struct xt_table *table)
1469 {
1470 	struct xt_table_info *private;
1471 
1472 	mutex_lock(&xt[table->af].mutex);
1473 	private = table->private;
1474 	list_del(&table->list);
1475 	mutex_unlock(&xt[table->af].mutex);
1476 	audit_log_nfcfg(table->name, table->af, private->number,
1477 			AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
1478 	kfree(table);
1479 
1480 	return private;
1481 }
1482 EXPORT_SYMBOL_GPL(xt_unregister_table);
1483 
1484 #ifdef CONFIG_PROC_FS
1485 static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
1486 {
1487 	struct net *net = seq_file_net(seq);
1488 	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
1489 
1490 	mutex_lock(&xt[af].mutex);
1491 	return seq_list_start(&net->xt.tables[af], *pos);
1492 }
1493 
1494 static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1495 {
1496 	struct net *net = seq_file_net(seq);
1497 	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
1498 
1499 	return seq_list_next(v, &net->xt.tables[af], pos);
1500 }
1501 
1502 static void xt_table_seq_stop(struct seq_file *seq, void *v)
1503 {
1504 	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
1505 
1506 	mutex_unlock(&xt[af].mutex);
1507 }
1508 
1509 static int xt_table_seq_show(struct seq_file *seq, void *v)
1510 {
1511 	struct xt_table *table = list_entry(v, struct xt_table, list);
1512 
1513 	if (*table->name)
1514 		seq_printf(seq, "%s\n", table->name);
1515 	return 0;
1516 }
1517 
1518 static const struct seq_operations xt_table_seq_ops = {
1519 	.start	= xt_table_seq_start,
1520 	.next	= xt_table_seq_next,
1521 	.stop	= xt_table_seq_stop,
1522 	.show	= xt_table_seq_show,
1523 };
1524 
1525 /*
1526  * Traverse state for ip{,6}_{tables,matches} for helping crossing
1527  * the multi-AF mutexes.
1528  */
1529 struct nf_mttg_trav {
1530 	struct list_head *head, *curr;
1531 	uint8_t class;
1532 };
1533 
1534 enum {
1535 	MTTG_TRAV_INIT,
1536 	MTTG_TRAV_NFP_UNSPEC,
1537 	MTTG_TRAV_NFP_SPEC,
1538 	MTTG_TRAV_DONE,
1539 };
1540 
1541 static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
1542     bool is_target)
1543 {
1544 	static const uint8_t next_class[] = {
1545 		[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
1546 		[MTTG_TRAV_NFP_SPEC]   = MTTG_TRAV_DONE,
1547 	};
1548 	uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
1549 	struct nf_mttg_trav *trav = seq->private;
1550 
1551 	if (ppos != NULL)
1552 		++(*ppos);
1553 
1554 	switch (trav->class) {
1555 	case MTTG_TRAV_INIT:
1556 		trav->class = MTTG_TRAV_NFP_UNSPEC;
1557 		mutex_lock(&xt[NFPROTO_UNSPEC].mutex);
1558 		trav->head = trav->curr = is_target ?
1559 			&xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match;
1560  		break;
1561 	case MTTG_TRAV_NFP_UNSPEC:
1562 		trav->curr = trav->curr->next;
1563 		if (trav->curr != trav->head)
1564 			break;
1565 		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
1566 		mutex_lock(&xt[nfproto].mutex);
1567 		trav->head = trav->curr = is_target ?
1568 			&xt[nfproto].target : &xt[nfproto].match;
1569 		trav->class = next_class[trav->class];
1570 		break;
1571 	case MTTG_TRAV_NFP_SPEC:
1572 		trav->curr = trav->curr->next;
1573 		if (trav->curr != trav->head)
1574 			break;
1575 		fallthrough;
1576 	default:
1577 		return NULL;
1578 	}
1579 	return trav;
1580 }
1581 
1582 static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
1583     bool is_target)
1584 {
1585 	struct nf_mttg_trav *trav = seq->private;
1586 	unsigned int j;
1587 
1588 	trav->class = MTTG_TRAV_INIT;
1589 	for (j = 0; j < *pos; ++j)
1590 		if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL)
1591 			return NULL;
1592 	return trav;
1593 }
1594 
1595 static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
1596 {
1597 	uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
1598 	struct nf_mttg_trav *trav = seq->private;
1599 
1600 	switch (trav->class) {
1601 	case MTTG_TRAV_NFP_UNSPEC:
1602 		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
1603 		break;
1604 	case MTTG_TRAV_NFP_SPEC:
1605 		mutex_unlock(&xt[nfproto].mutex);
1606 		break;
1607 	}
1608 }
1609 
1610 static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
1611 {
1612 	return xt_mttg_seq_start(seq, pos, false);
1613 }
1614 
1615 static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
1616 {
1617 	return xt_mttg_seq_next(seq, v, ppos, false);
1618 }
1619 
1620 static int xt_match_seq_show(struct seq_file *seq, void *v)
1621 {
1622 	const struct nf_mttg_trav *trav = seq->private;
1623 	const struct xt_match *match;
1624 
1625 	switch (trav->class) {
1626 	case MTTG_TRAV_NFP_UNSPEC:
1627 	case MTTG_TRAV_NFP_SPEC:
1628 		if (trav->curr == trav->head)
1629 			return 0;
1630 		match = list_entry(trav->curr, struct xt_match, list);
1631 		if (*match->name)
1632 			seq_printf(seq, "%s\n", match->name);
1633 	}
1634 	return 0;
1635 }
1636 
1637 static const struct seq_operations xt_match_seq_ops = {
1638 	.start	= xt_match_seq_start,
1639 	.next	= xt_match_seq_next,
1640 	.stop	= xt_mttg_seq_stop,
1641 	.show	= xt_match_seq_show,
1642 };
1643 
1644 static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
1645 {
1646 	return xt_mttg_seq_start(seq, pos, true);
1647 }
1648 
1649 static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
1650 {
1651 	return xt_mttg_seq_next(seq, v, ppos, true);
1652 }
1653 
1654 static int xt_target_seq_show(struct seq_file *seq, void *v)
1655 {
1656 	const struct nf_mttg_trav *trav = seq->private;
1657 	const struct xt_target *target;
1658 
1659 	switch (trav->class) {
1660 	case MTTG_TRAV_NFP_UNSPEC:
1661 	case MTTG_TRAV_NFP_SPEC:
1662 		if (trav->curr == trav->head)
1663 			return 0;
1664 		target = list_entry(trav->curr, struct xt_target, list);
1665 		if (*target->name)
1666 			seq_printf(seq, "%s\n", target->name);
1667 	}
1668 	return 0;
1669 }
1670 
1671 static const struct seq_operations xt_target_seq_ops = {
1672 	.start	= xt_target_seq_start,
1673 	.next	= xt_target_seq_next,
1674 	.stop	= xt_mttg_seq_stop,
1675 	.show	= xt_target_seq_show,
1676 };
1677 
1678 #define FORMAT_TABLES	"_tables_names"
1679 #define	FORMAT_MATCHES	"_tables_matches"
1680 #define FORMAT_TARGETS 	"_tables_targets"
1681 
1682 #endif /* CONFIG_PROC_FS */
1683 
1684 /**
1685  * xt_hook_ops_alloc - set up hooks for a new table
1686  * @table:	table with metadata needed to set up hooks
1687  * @fn:		Hook function
1688  *
1689  * This function will create the nf_hook_ops that the x_table needs
1690  * to hand to xt_hook_link_net().
1691  */
1692 struct nf_hook_ops *
1693 xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
1694 {
1695 	unsigned int hook_mask = table->valid_hooks;
1696 	uint8_t i, num_hooks = hweight32(hook_mask);
1697 	uint8_t hooknum;
1698 	struct nf_hook_ops *ops;
1699 
1700 	if (!num_hooks)
1701 		return ERR_PTR(-EINVAL);
1702 
1703 	ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL);
1704 	if (ops == NULL)
1705 		return ERR_PTR(-ENOMEM);
1706 
1707 	for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
1708 	     hook_mask >>= 1, ++hooknum) {
1709 		if (!(hook_mask & 1))
1710 			continue;
1711 		ops[i].hook     = fn;
1712 		ops[i].pf       = table->af;
1713 		ops[i].hooknum  = hooknum;
1714 		ops[i].priority = table->priority;
1715 		++i;
1716 	}
1717 
1718 	return ops;
1719 }
1720 EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
1721 
1722 int xt_proto_init(struct net *net, u_int8_t af)
1723 {
1724 #ifdef CONFIG_PROC_FS
1725 	char buf[XT_FUNCTION_MAXNAMELEN];
1726 	struct proc_dir_entry *proc;
1727 	kuid_t root_uid;
1728 	kgid_t root_gid;
1729 #endif
1730 
1731 	if (af >= ARRAY_SIZE(xt_prefix))
1732 		return -EINVAL;
1733 
1734 
1735 #ifdef CONFIG_PROC_FS
1736 	root_uid = make_kuid(net->user_ns, 0);
1737 	root_gid = make_kgid(net->user_ns, 0);
1738 
1739 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1740 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
1741 	proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops,
1742 			sizeof(struct seq_net_private),
1743 			(void *)(unsigned long)af);
1744 	if (!proc)
1745 		goto out;
1746 	if (uid_valid(root_uid) && gid_valid(root_gid))
1747 		proc_set_user(proc, root_uid, root_gid);
1748 
1749 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1750 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
1751 	proc = proc_create_seq_private(buf, 0440, net->proc_net,
1752 			&xt_match_seq_ops, sizeof(struct nf_mttg_trav),
1753 			(void *)(unsigned long)af);
1754 	if (!proc)
1755 		goto out_remove_tables;
1756 	if (uid_valid(root_uid) && gid_valid(root_gid))
1757 		proc_set_user(proc, root_uid, root_gid);
1758 
1759 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1760 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
1761 	proc = proc_create_seq_private(buf, 0440, net->proc_net,
1762 			 &xt_target_seq_ops, sizeof(struct nf_mttg_trav),
1763 			 (void *)(unsigned long)af);
1764 	if (!proc)
1765 		goto out_remove_matches;
1766 	if (uid_valid(root_uid) && gid_valid(root_gid))
1767 		proc_set_user(proc, root_uid, root_gid);
1768 #endif
1769 
1770 	return 0;
1771 
1772 #ifdef CONFIG_PROC_FS
1773 out_remove_matches:
1774 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1775 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
1776 	remove_proc_entry(buf, net->proc_net);
1777 
1778 out_remove_tables:
1779 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1780 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
1781 	remove_proc_entry(buf, net->proc_net);
1782 out:
1783 	return -1;
1784 #endif
1785 }
1786 EXPORT_SYMBOL_GPL(xt_proto_init);
1787 
1788 void xt_proto_fini(struct net *net, u_int8_t af)
1789 {
1790 #ifdef CONFIG_PROC_FS
1791 	char buf[XT_FUNCTION_MAXNAMELEN];
1792 
1793 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1794 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
1795 	remove_proc_entry(buf, net->proc_net);
1796 
1797 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1798 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
1799 	remove_proc_entry(buf, net->proc_net);
1800 
1801 	strlcpy(buf, xt_prefix[af], sizeof(buf));
1802 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
1803 	remove_proc_entry(buf, net->proc_net);
1804 #endif /*CONFIG_PROC_FS*/
1805 }
1806 EXPORT_SYMBOL_GPL(xt_proto_fini);
1807 
1808 /**
1809  * xt_percpu_counter_alloc - allocate x_tables rule counter
1810  *
1811  * @state: pointer to xt_percpu allocation state
1812  * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
1813  *
1814  * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
1815  * contain the address of the real (percpu) counter.
1816  *
1817  * Rule evaluation needs to use xt_get_this_cpu_counter() helper
1818  * to fetch the real percpu counter.
1819  *
1820  * To speed up allocation and improve data locality, a 4kb block is
1821  * allocated.  Freeing any counter may free an entire block, so all
1822  * counters allocated using the same state must be freed at the same
1823  * time.
1824  *
1825  * xt_percpu_counter_alloc_state contains the base address of the
1826  * allocated page and the current sub-offset.
1827  *
1828  * returns false on error.
1829  */
1830 bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
1831 			     struct xt_counters *counter)
1832 {
1833 	BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
1834 
1835 	if (nr_cpu_ids <= 1)
1836 		return true;
1837 
1838 	if (!state->mem) {
1839 		state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
1840 					    XT_PCPU_BLOCK_SIZE);
1841 		if (!state->mem)
1842 			return false;
1843 	}
1844 	counter->pcnt = (__force unsigned long)(state->mem + state->off);
1845 	state->off += sizeof(*counter);
1846 	if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
1847 		state->mem = NULL;
1848 		state->off = 0;
1849 	}
1850 	return true;
1851 }
1852 EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
1853 
1854 void xt_percpu_counter_free(struct xt_counters *counters)
1855 {
1856 	unsigned long pcnt = counters->pcnt;
1857 
1858 	if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
1859 		free_percpu((void __percpu *)pcnt);
1860 }
1861 EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
1862 
1863 static int __net_init xt_net_init(struct net *net)
1864 {
1865 	int i;
1866 
1867 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
1868 		INIT_LIST_HEAD(&net->xt.tables[i]);
1869 	return 0;
1870 }
1871 
1872 static void __net_exit xt_net_exit(struct net *net)
1873 {
1874 	int i;
1875 
1876 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
1877 		WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
1878 }
1879 
1880 static struct pernet_operations xt_net_ops = {
1881 	.init = xt_net_init,
1882 	.exit = xt_net_exit,
1883 };
1884 
1885 static int __init xt_init(void)
1886 {
1887 	unsigned int i;
1888 	int rv;
1889 
1890 	for_each_possible_cpu(i) {
1891 		seqcount_init(&per_cpu(xt_recseq, i));
1892 	}
1893 
1894 	xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
1895 	if (!xt)
1896 		return -ENOMEM;
1897 
1898 	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
1899 		mutex_init(&xt[i].mutex);
1900 #ifdef CONFIG_COMPAT
1901 		mutex_init(&xt[i].compat_mutex);
1902 		xt[i].compat_tab = NULL;
1903 #endif
1904 		INIT_LIST_HEAD(&xt[i].target);
1905 		INIT_LIST_HEAD(&xt[i].match);
1906 	}
1907 	rv = register_pernet_subsys(&xt_net_ops);
1908 	if (rv < 0)
1909 		kfree(xt);
1910 	return rv;
1911 }
1912 
1913 static void __exit xt_fini(void)
1914 {
1915 	unregister_pernet_subsys(&xt_net_ops);
1916 	kfree(xt);
1917 }
1918 
1919 module_init(xt_init);
1920 module_exit(xt_fini);
1921 
1922