1 /* netfilter.c: look after the filters for various protocols. 2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. 3 * 4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any 5 * way. 6 * 7 * This code is GPL. 8 */ 9 #include <linux/kernel.h> 10 #include <linux/netfilter.h> 11 #include <net/protocol.h> 12 #include <linux/init.h> 13 #include <linux/skbuff.h> 14 #include <linux/wait.h> 15 #include <linux/module.h> 16 #include <linux/interrupt.h> 17 #include <linux/if.h> 18 #include <linux/netdevice.h> 19 #include <linux/netfilter_ipv6.h> 20 #include <linux/inetdevice.h> 21 #include <linux/proc_fs.h> 22 #include <linux/mutex.h> 23 #include <linux/mm.h> 24 #include <linux/rcupdate.h> 25 #include <net/net_namespace.h> 26 #include <net/sock.h> 27 28 #include "nf_internals.h" 29 30 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; 31 EXPORT_SYMBOL_GPL(nf_ipv6_ops); 32 33 DEFINE_PER_CPU(bool, nf_skb_duplicated); 34 EXPORT_SYMBOL_GPL(nf_skb_duplicated); 35 36 #ifdef HAVE_JUMP_LABEL 37 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 38 EXPORT_SYMBOL(nf_hooks_needed); 39 #endif 40 41 static DEFINE_MUTEX(nf_hook_mutex); 42 43 /* max hooks per family/hooknum */ 44 #define MAX_HOOK_COUNT 1024 45 46 #define nf_entry_dereference(e) \ 47 rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) 48 49 static struct nf_hook_entries *allocate_hook_entries_size(u16 num) 50 { 51 struct nf_hook_entries *e; 52 size_t alloc = sizeof(*e) + 53 sizeof(struct nf_hook_entry) * num + 54 sizeof(struct nf_hook_ops *) * num + 55 sizeof(struct nf_hook_entries_rcu_head); 56 57 if (num == 0) 58 return NULL; 59 60 e = kvzalloc(alloc, GFP_KERNEL); 61 if (e) 62 e->num_hook_entries = num; 63 return e; 64 } 65 66 static void __nf_hook_entries_free(struct rcu_head *h) 67 { 68 struct nf_hook_entries_rcu_head *head; 69 70 head = container_of(h, struct nf_hook_entries_rcu_head, head); 71 kvfree(head->allocation); 72 } 73 74 static void nf_hook_entries_free(struct nf_hook_entries *e) 75 { 76 struct nf_hook_entries_rcu_head *head; 77 struct nf_hook_ops **ops; 78 unsigned int num; 79 80 if (!e) 81 return; 82 83 num = e->num_hook_entries; 84 ops = nf_hook_entries_get_hook_ops(e); 85 head = (void *)&ops[num]; 86 head->allocation = e; 87 call_rcu(&head->head, __nf_hook_entries_free); 88 } 89 90 static unsigned int accept_all(void *priv, 91 struct sk_buff *skb, 92 const struct nf_hook_state *state) 93 { 94 return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */ 95 } 96 97 static const struct nf_hook_ops dummy_ops = { 98 .hook = accept_all, 99 .priority = INT_MIN, 100 }; 101 102 static struct nf_hook_entries * 103 nf_hook_entries_grow(const struct nf_hook_entries *old, 104 const struct nf_hook_ops *reg) 105 { 106 unsigned int i, alloc_entries, nhooks, old_entries; 107 struct nf_hook_ops **orig_ops = NULL; 108 struct nf_hook_ops **new_ops; 109 struct nf_hook_entries *new; 110 bool inserted = false; 111 112 alloc_entries = 1; 113 old_entries = old ? old->num_hook_entries : 0; 114 115 if (old) { 116 orig_ops = nf_hook_entries_get_hook_ops(old); 117 118 for (i = 0; i < old_entries; i++) { 119 if (orig_ops[i] != &dummy_ops) 120 alloc_entries++; 121 } 122 } 123 124 if (alloc_entries > MAX_HOOK_COUNT) 125 return ERR_PTR(-E2BIG); 126 127 new = allocate_hook_entries_size(alloc_entries); 128 if (!new) 129 return ERR_PTR(-ENOMEM); 130 131 new_ops = nf_hook_entries_get_hook_ops(new); 132 133 i = 0; 134 nhooks = 0; 135 while (i < old_entries) { 136 if (orig_ops[i] == &dummy_ops) { 137 ++i; 138 continue; 139 } 140 141 if (inserted || reg->priority > orig_ops[i]->priority) { 142 new_ops[nhooks] = (void *)orig_ops[i]; 143 new->hooks[nhooks] = old->hooks[i]; 144 i++; 145 } else { 146 new_ops[nhooks] = (void *)reg; 147 new->hooks[nhooks].hook = reg->hook; 148 new->hooks[nhooks].priv = reg->priv; 149 inserted = true; 150 } 151 nhooks++; 152 } 153 154 if (!inserted) { 155 new_ops[nhooks] = (void *)reg; 156 new->hooks[nhooks].hook = reg->hook; 157 new->hooks[nhooks].priv = reg->priv; 158 } 159 160 return new; 161 } 162 163 static void hooks_validate(const struct nf_hook_entries *hooks) 164 { 165 #ifdef CONFIG_DEBUG_KERNEL 166 struct nf_hook_ops **orig_ops; 167 int prio = INT_MIN; 168 size_t i = 0; 169 170 orig_ops = nf_hook_entries_get_hook_ops(hooks); 171 172 for (i = 0; i < hooks->num_hook_entries; i++) { 173 if (orig_ops[i] == &dummy_ops) 174 continue; 175 176 WARN_ON(orig_ops[i]->priority < prio); 177 178 if (orig_ops[i]->priority > prio) 179 prio = orig_ops[i]->priority; 180 } 181 #endif 182 } 183 184 int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu **pp, 185 const struct nf_hook_ops *reg) 186 { 187 struct nf_hook_entries *new_hooks; 188 struct nf_hook_entries *p; 189 190 p = rcu_dereference_raw(*pp); 191 new_hooks = nf_hook_entries_grow(p, reg); 192 if (IS_ERR(new_hooks)) 193 return PTR_ERR(new_hooks); 194 195 hooks_validate(new_hooks); 196 197 rcu_assign_pointer(*pp, new_hooks); 198 199 BUG_ON(p == new_hooks); 200 nf_hook_entries_free(p); 201 return 0; 202 } 203 EXPORT_SYMBOL_GPL(nf_hook_entries_insert_raw); 204 205 /* 206 * __nf_hook_entries_try_shrink - try to shrink hook array 207 * 208 * @old -- current hook blob at @pp 209 * @pp -- location of hook blob 210 * 211 * Hook unregistration must always succeed, so to-be-removed hooks 212 * are replaced by a dummy one that will just move to next hook. 213 * 214 * This counts the current dummy hooks, attempts to allocate new blob, 215 * copies the live hooks, then replaces and discards old one. 216 * 217 * return values: 218 * 219 * Returns address to free, or NULL. 220 */ 221 static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old, 222 struct nf_hook_entries __rcu **pp) 223 { 224 unsigned int i, j, skip = 0, hook_entries; 225 struct nf_hook_entries *new = NULL; 226 struct nf_hook_ops **orig_ops; 227 struct nf_hook_ops **new_ops; 228 229 if (WARN_ON_ONCE(!old)) 230 return NULL; 231 232 orig_ops = nf_hook_entries_get_hook_ops(old); 233 for (i = 0; i < old->num_hook_entries; i++) { 234 if (orig_ops[i] == &dummy_ops) 235 skip++; 236 } 237 238 /* if skip == hook_entries all hooks have been removed */ 239 hook_entries = old->num_hook_entries; 240 if (skip == hook_entries) 241 goto out_assign; 242 243 if (skip == 0) 244 return NULL; 245 246 hook_entries -= skip; 247 new = allocate_hook_entries_size(hook_entries); 248 if (!new) 249 return NULL; 250 251 new_ops = nf_hook_entries_get_hook_ops(new); 252 for (i = 0, j = 0; i < old->num_hook_entries; i++) { 253 if (orig_ops[i] == &dummy_ops) 254 continue; 255 new->hooks[j] = old->hooks[i]; 256 new_ops[j] = (void *)orig_ops[i]; 257 j++; 258 } 259 hooks_validate(new); 260 out_assign: 261 rcu_assign_pointer(*pp, new); 262 return old; 263 } 264 265 static struct nf_hook_entries __rcu ** 266 nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, 267 struct net_device *dev) 268 { 269 switch (pf) { 270 case NFPROTO_NETDEV: 271 break; 272 #ifdef CONFIG_NETFILTER_FAMILY_ARP 273 case NFPROTO_ARP: 274 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum)) 275 return NULL; 276 return net->nf.hooks_arp + hooknum; 277 #endif 278 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE 279 case NFPROTO_BRIDGE: 280 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum)) 281 return NULL; 282 return net->nf.hooks_bridge + hooknum; 283 #endif 284 case NFPROTO_IPV4: 285 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum)) 286 return NULL; 287 return net->nf.hooks_ipv4 + hooknum; 288 case NFPROTO_IPV6: 289 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum)) 290 return NULL; 291 return net->nf.hooks_ipv6 + hooknum; 292 #if IS_ENABLED(CONFIG_DECNET) 293 case NFPROTO_DECNET: 294 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum)) 295 return NULL; 296 return net->nf.hooks_decnet + hooknum; 297 #endif 298 default: 299 WARN_ON_ONCE(1); 300 return NULL; 301 } 302 303 #ifdef CONFIG_NETFILTER_INGRESS 304 if (hooknum == NF_NETDEV_INGRESS) { 305 if (dev && dev_net(dev) == net) 306 return &dev->nf_hooks_ingress; 307 } 308 #endif 309 WARN_ON_ONCE(1); 310 return NULL; 311 } 312 313 static int __nf_register_net_hook(struct net *net, int pf, 314 const struct nf_hook_ops *reg) 315 { 316 struct nf_hook_entries *p, *new_hooks; 317 struct nf_hook_entries __rcu **pp; 318 319 if (pf == NFPROTO_NETDEV) { 320 #ifndef CONFIG_NETFILTER_INGRESS 321 if (reg->hooknum == NF_NETDEV_INGRESS) 322 return -EOPNOTSUPP; 323 #endif 324 if (reg->hooknum != NF_NETDEV_INGRESS || 325 !reg->dev || dev_net(reg->dev) != net) 326 return -EINVAL; 327 } 328 329 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev); 330 if (!pp) 331 return -EINVAL; 332 333 mutex_lock(&nf_hook_mutex); 334 335 p = nf_entry_dereference(*pp); 336 new_hooks = nf_hook_entries_grow(p, reg); 337 338 if (!IS_ERR(new_hooks)) 339 rcu_assign_pointer(*pp, new_hooks); 340 341 mutex_unlock(&nf_hook_mutex); 342 if (IS_ERR(new_hooks)) 343 return PTR_ERR(new_hooks); 344 345 hooks_validate(new_hooks); 346 #ifdef CONFIG_NETFILTER_INGRESS 347 if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) 348 net_inc_ingress_queue(); 349 #endif 350 #ifdef HAVE_JUMP_LABEL 351 static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]); 352 #endif 353 BUG_ON(p == new_hooks); 354 nf_hook_entries_free(p); 355 return 0; 356 } 357 358 /* 359 * nf_remove_net_hook - remove a hook from blob 360 * 361 * @oldp: current address of hook blob 362 * @unreg: hook to unregister 363 * 364 * This cannot fail, hook unregistration must always succeed. 365 * Therefore replace the to-be-removed hook with a dummy hook. 366 */ 367 static bool nf_remove_net_hook(struct nf_hook_entries *old, 368 const struct nf_hook_ops *unreg) 369 { 370 struct nf_hook_ops **orig_ops; 371 unsigned int i; 372 373 orig_ops = nf_hook_entries_get_hook_ops(old); 374 for (i = 0; i < old->num_hook_entries; i++) { 375 if (orig_ops[i] != unreg) 376 continue; 377 WRITE_ONCE(old->hooks[i].hook, accept_all); 378 WRITE_ONCE(orig_ops[i], &dummy_ops); 379 return true; 380 } 381 382 return false; 383 } 384 385 static void __nf_unregister_net_hook(struct net *net, int pf, 386 const struct nf_hook_ops *reg) 387 { 388 struct nf_hook_entries __rcu **pp; 389 struct nf_hook_entries *p; 390 391 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev); 392 if (!pp) 393 return; 394 395 mutex_lock(&nf_hook_mutex); 396 397 p = nf_entry_dereference(*pp); 398 if (WARN_ON_ONCE(!p)) { 399 mutex_unlock(&nf_hook_mutex); 400 return; 401 } 402 403 if (nf_remove_net_hook(p, reg)) { 404 #ifdef CONFIG_NETFILTER_INGRESS 405 if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) 406 net_dec_ingress_queue(); 407 #endif 408 #ifdef HAVE_JUMP_LABEL 409 static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]); 410 #endif 411 } else { 412 WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum); 413 } 414 415 p = __nf_hook_entries_try_shrink(p, pp); 416 mutex_unlock(&nf_hook_mutex); 417 if (!p) 418 return; 419 420 nf_queue_nf_hook_drop(net); 421 nf_hook_entries_free(p); 422 } 423 424 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) 425 { 426 if (reg->pf == NFPROTO_INET) { 427 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); 428 __nf_unregister_net_hook(net, NFPROTO_IPV6, reg); 429 } else { 430 __nf_unregister_net_hook(net, reg->pf, reg); 431 } 432 } 433 EXPORT_SYMBOL(nf_unregister_net_hook); 434 435 void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp, 436 const struct nf_hook_ops *reg) 437 { 438 struct nf_hook_entries *p; 439 440 p = rcu_dereference_raw(*pp); 441 if (nf_remove_net_hook(p, reg)) { 442 p = __nf_hook_entries_try_shrink(p, pp); 443 nf_hook_entries_free(p); 444 } 445 } 446 EXPORT_SYMBOL_GPL(nf_hook_entries_delete_raw); 447 448 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) 449 { 450 int err; 451 452 if (reg->pf == NFPROTO_INET) { 453 err = __nf_register_net_hook(net, NFPROTO_IPV4, reg); 454 if (err < 0) 455 return err; 456 457 err = __nf_register_net_hook(net, NFPROTO_IPV6, reg); 458 if (err < 0) { 459 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); 460 return err; 461 } 462 } else { 463 err = __nf_register_net_hook(net, reg->pf, reg); 464 if (err < 0) 465 return err; 466 } 467 468 return 0; 469 } 470 EXPORT_SYMBOL(nf_register_net_hook); 471 472 int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, 473 unsigned int n) 474 { 475 unsigned int i; 476 int err = 0; 477 478 for (i = 0; i < n; i++) { 479 err = nf_register_net_hook(net, ®[i]); 480 if (err) 481 goto err; 482 } 483 return err; 484 485 err: 486 if (i > 0) 487 nf_unregister_net_hooks(net, reg, i); 488 return err; 489 } 490 EXPORT_SYMBOL(nf_register_net_hooks); 491 492 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, 493 unsigned int hookcount) 494 { 495 unsigned int i; 496 497 for (i = 0; i < hookcount; i++) 498 nf_unregister_net_hook(net, ®[i]); 499 } 500 EXPORT_SYMBOL(nf_unregister_net_hooks); 501 502 /* Returns 1 if okfn() needs to be executed by the caller, 503 * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ 504 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, 505 const struct nf_hook_entries *e, unsigned int s) 506 { 507 unsigned int verdict; 508 int ret; 509 510 for (; s < e->num_hook_entries; s++) { 511 verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state); 512 switch (verdict & NF_VERDICT_MASK) { 513 case NF_ACCEPT: 514 break; 515 case NF_DROP: 516 kfree_skb(skb); 517 ret = NF_DROP_GETERR(verdict); 518 if (ret == 0) 519 ret = -EPERM; 520 return ret; 521 case NF_QUEUE: 522 ret = nf_queue(skb, state, e, s, verdict); 523 if (ret == 1) 524 continue; 525 return ret; 526 default: 527 /* Implicit handling for NF_STOLEN, as well as any other 528 * non conventional verdicts. 529 */ 530 return 0; 531 } 532 } 533 534 return 1; 535 } 536 EXPORT_SYMBOL(nf_hook_slow); 537 538 539 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) 540 { 541 if (writable_len > skb->len) 542 return 0; 543 544 /* Not exclusive use of packet? Must copy. */ 545 if (!skb_cloned(skb)) { 546 if (writable_len <= skb_headlen(skb)) 547 return 1; 548 } else if (skb_clone_writable(skb, writable_len)) 549 return 1; 550 551 if (writable_len <= skb_headlen(skb)) 552 writable_len = 0; 553 else 554 writable_len -= skb_headlen(skb); 555 556 return !!__pskb_pull_tail(skb, writable_len); 557 } 558 EXPORT_SYMBOL(skb_make_writable); 559 560 /* This needs to be compiled in any case to avoid dependencies between the 561 * nfnetlink_queue code and nf_conntrack. 562 */ 563 struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; 564 EXPORT_SYMBOL_GPL(nfnl_ct_hook); 565 566 struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; 567 EXPORT_SYMBOL_GPL(nf_ct_hook); 568 569 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 570 /* This does not belong here, but locally generated errors need it if connection 571 tracking in use: without this, connection may not be in hash table, and hence 572 manufactured ICMP or RST packets will not be associated with it. */ 573 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) 574 __rcu __read_mostly; 575 EXPORT_SYMBOL(ip_ct_attach); 576 577 struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; 578 EXPORT_SYMBOL_GPL(nf_nat_hook); 579 580 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) 581 { 582 void (*attach)(struct sk_buff *, const struct sk_buff *); 583 584 if (skb->_nfct) { 585 rcu_read_lock(); 586 attach = rcu_dereference(ip_ct_attach); 587 if (attach) 588 attach(new, skb); 589 rcu_read_unlock(); 590 } 591 } 592 EXPORT_SYMBOL(nf_ct_attach); 593 594 void nf_conntrack_destroy(struct nf_conntrack *nfct) 595 { 596 struct nf_ct_hook *ct_hook; 597 598 rcu_read_lock(); 599 ct_hook = rcu_dereference(nf_ct_hook); 600 BUG_ON(ct_hook == NULL); 601 ct_hook->destroy(nfct); 602 rcu_read_unlock(); 603 } 604 EXPORT_SYMBOL(nf_conntrack_destroy); 605 606 bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, 607 const struct sk_buff *skb) 608 { 609 struct nf_ct_hook *ct_hook; 610 bool ret = false; 611 612 rcu_read_lock(); 613 ct_hook = rcu_dereference(nf_ct_hook); 614 if (ct_hook) 615 ret = ct_hook->get_tuple_skb(dst_tuple, skb); 616 rcu_read_unlock(); 617 return ret; 618 } 619 EXPORT_SYMBOL(nf_ct_get_tuple_skb); 620 621 /* Built-in default zone used e.g. by modules. */ 622 const struct nf_conntrack_zone nf_ct_zone_dflt = { 623 .id = NF_CT_DEFAULT_ZONE_ID, 624 .dir = NF_CT_DEFAULT_ZONE_DIR, 625 }; 626 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt); 627 #endif /* CONFIG_NF_CONNTRACK */ 628 629 static void __net_init 630 __netfilter_net_init(struct nf_hook_entries __rcu **e, int max) 631 { 632 int h; 633 634 for (h = 0; h < max; h++) 635 RCU_INIT_POINTER(e[h], NULL); 636 } 637 638 static int __net_init netfilter_net_init(struct net *net) 639 { 640 __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4)); 641 __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6)); 642 #ifdef CONFIG_NETFILTER_FAMILY_ARP 643 __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp)); 644 #endif 645 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE 646 __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); 647 #endif 648 #if IS_ENABLED(CONFIG_DECNET) 649 __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); 650 #endif 651 652 #ifdef CONFIG_PROC_FS 653 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", 654 net->proc_net); 655 if (!net->nf.proc_netfilter) { 656 if (!net_eq(net, &init_net)) 657 pr_err("cannot create netfilter proc entry"); 658 659 return -ENOMEM; 660 } 661 #endif 662 663 return 0; 664 } 665 666 static void __net_exit netfilter_net_exit(struct net *net) 667 { 668 remove_proc_entry("netfilter", net->proc_net); 669 } 670 671 static struct pernet_operations netfilter_net_ops = { 672 .init = netfilter_net_init, 673 .exit = netfilter_net_exit, 674 }; 675 676 int __init netfilter_init(void) 677 { 678 int ret; 679 680 ret = register_pernet_subsys(&netfilter_net_ops); 681 if (ret < 0) 682 goto err; 683 684 ret = netfilter_log_init(); 685 if (ret < 0) 686 goto err_pernet; 687 688 return 0; 689 err_pernet: 690 unregister_pernet_subsys(&netfilter_net_ops); 691 err: 692 return ret; 693 } 694