1 /* netfilter.c: look after the filters for various protocols. 2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. 3 * 4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any 5 * way. 6 * 7 * This code is GPL. 8 */ 9 #include <linux/kernel.h> 10 #include <linux/netfilter.h> 11 #include <net/protocol.h> 12 #include <linux/init.h> 13 #include <linux/skbuff.h> 14 #include <linux/wait.h> 15 #include <linux/module.h> 16 #include <linux/interrupt.h> 17 #include <linux/if.h> 18 #include <linux/netdevice.h> 19 #include <linux/netfilter_ipv6.h> 20 #include <linux/inetdevice.h> 21 #include <linux/proc_fs.h> 22 #include <linux/mutex.h> 23 #include <linux/mm.h> 24 #include <linux/rcupdate.h> 25 #include <net/net_namespace.h> 26 #include <net/sock.h> 27 28 #include "nf_internals.h" 29 30 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; 31 EXPORT_SYMBOL_GPL(nf_ipv6_ops); 32 33 DEFINE_PER_CPU(bool, nf_skb_duplicated); 34 EXPORT_SYMBOL_GPL(nf_skb_duplicated); 35 36 #ifdef HAVE_JUMP_LABEL 37 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 38 EXPORT_SYMBOL(nf_hooks_needed); 39 #endif 40 41 static DEFINE_MUTEX(nf_hook_mutex); 42 43 /* max hooks per family/hooknum */ 44 #define MAX_HOOK_COUNT 1024 45 46 #define nf_entry_dereference(e) \ 47 rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) 48 49 static struct nf_hook_entries *allocate_hook_entries_size(u16 num) 50 { 51 struct nf_hook_entries *e; 52 size_t alloc = sizeof(*e) + 53 sizeof(struct nf_hook_entry) * num + 54 sizeof(struct nf_hook_ops *) * num + 55 sizeof(struct nf_hook_entries_rcu_head); 56 57 if (num == 0) 58 return NULL; 59 60 e = kvzalloc(alloc, GFP_KERNEL); 61 if (e) 62 e->num_hook_entries = num; 63 return e; 64 } 65 66 static void __nf_hook_entries_free(struct rcu_head *h) 67 { 68 struct nf_hook_entries_rcu_head *head; 69 70 head = container_of(h, struct nf_hook_entries_rcu_head, head); 71 kvfree(head->allocation); 72 } 73 74 static void nf_hook_entries_free(struct nf_hook_entries *e) 75 { 76 struct nf_hook_entries_rcu_head *head; 77 struct nf_hook_ops **ops; 78 unsigned int num; 79 80 if (!e) 81 return; 82 83 num = e->num_hook_entries; 84 ops = nf_hook_entries_get_hook_ops(e); 85 head = (void *)&ops[num]; 86 head->allocation = e; 87 call_rcu(&head->head, __nf_hook_entries_free); 88 } 89 90 static unsigned int accept_all(void *priv, 91 struct sk_buff *skb, 92 const struct nf_hook_state *state) 93 { 94 return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */ 95 } 96 97 static const struct nf_hook_ops dummy_ops = { 98 .hook = accept_all, 99 .priority = INT_MIN, 100 }; 101 102 static struct nf_hook_entries * 103 nf_hook_entries_grow(const struct nf_hook_entries *old, 104 const struct nf_hook_ops *reg) 105 { 106 unsigned int i, alloc_entries, nhooks, old_entries; 107 struct nf_hook_ops **orig_ops = NULL; 108 struct nf_hook_ops **new_ops; 109 struct nf_hook_entries *new; 110 bool inserted = false; 111 112 alloc_entries = 1; 113 old_entries = old ? old->num_hook_entries : 0; 114 115 if (old) { 116 orig_ops = nf_hook_entries_get_hook_ops(old); 117 118 for (i = 0; i < old_entries; i++) { 119 if (orig_ops[i] != &dummy_ops) 120 alloc_entries++; 121 } 122 } 123 124 if (alloc_entries > MAX_HOOK_COUNT) 125 return ERR_PTR(-E2BIG); 126 127 new = allocate_hook_entries_size(alloc_entries); 128 if (!new) 129 return ERR_PTR(-ENOMEM); 130 131 new_ops = nf_hook_entries_get_hook_ops(new); 132 133 i = 0; 134 nhooks = 0; 135 while (i < old_entries) { 136 if (orig_ops[i] == &dummy_ops) { 137 ++i; 138 continue; 139 } 140 141 if (reg->nat_hook && orig_ops[i]->nat_hook) { 142 kvfree(new); 143 return ERR_PTR(-EBUSY); 144 } 145 146 if (inserted || reg->priority > orig_ops[i]->priority) { 147 new_ops[nhooks] = (void *)orig_ops[i]; 148 new->hooks[nhooks] = old->hooks[i]; 149 i++; 150 } else { 151 new_ops[nhooks] = (void *)reg; 152 new->hooks[nhooks].hook = reg->hook; 153 new->hooks[nhooks].priv = reg->priv; 154 inserted = true; 155 } 156 nhooks++; 157 } 158 159 if (!inserted) { 160 new_ops[nhooks] = (void *)reg; 161 new->hooks[nhooks].hook = reg->hook; 162 new->hooks[nhooks].priv = reg->priv; 163 } 164 165 return new; 166 } 167 168 static void hooks_validate(const struct nf_hook_entries *hooks) 169 { 170 #ifdef CONFIG_DEBUG_KERNEL 171 struct nf_hook_ops **orig_ops; 172 int prio = INT_MIN; 173 size_t i = 0; 174 175 orig_ops = nf_hook_entries_get_hook_ops(hooks); 176 177 for (i = 0; i < hooks->num_hook_entries; i++) { 178 if (orig_ops[i] == &dummy_ops) 179 continue; 180 181 WARN_ON(orig_ops[i]->priority < prio); 182 183 if (orig_ops[i]->priority > prio) 184 prio = orig_ops[i]->priority; 185 } 186 #endif 187 } 188 189 /* 190 * __nf_hook_entries_try_shrink - try to shrink hook array 191 * 192 * @pp -- location of hook blob 193 * 194 * Hook unregistration must always succeed, so to-be-removed hooks 195 * are replaced by a dummy one that will just move to next hook. 196 * 197 * This counts the current dummy hooks, attempts to allocate new blob, 198 * copies the live hooks, then replaces and discards old one. 199 * 200 * return values: 201 * 202 * Returns address to free, or NULL. 203 */ 204 static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp) 205 { 206 struct nf_hook_entries *old, *new = NULL; 207 unsigned int i, j, skip = 0, hook_entries; 208 struct nf_hook_ops **orig_ops; 209 struct nf_hook_ops **new_ops; 210 211 old = nf_entry_dereference(*pp); 212 if (WARN_ON_ONCE(!old)) 213 return NULL; 214 215 orig_ops = nf_hook_entries_get_hook_ops(old); 216 for (i = 0; i < old->num_hook_entries; i++) { 217 if (orig_ops[i] == &dummy_ops) 218 skip++; 219 } 220 221 /* if skip == hook_entries all hooks have been removed */ 222 hook_entries = old->num_hook_entries; 223 if (skip == hook_entries) 224 goto out_assign; 225 226 if (skip == 0) 227 return NULL; 228 229 hook_entries -= skip; 230 new = allocate_hook_entries_size(hook_entries); 231 if (!new) 232 return NULL; 233 234 new_ops = nf_hook_entries_get_hook_ops(new); 235 for (i = 0, j = 0; i < old->num_hook_entries; i++) { 236 if (orig_ops[i] == &dummy_ops) 237 continue; 238 new->hooks[j] = old->hooks[i]; 239 new_ops[j] = (void *)orig_ops[i]; 240 j++; 241 } 242 hooks_validate(new); 243 out_assign: 244 rcu_assign_pointer(*pp, new); 245 return old; 246 } 247 248 static struct nf_hook_entries __rcu ** 249 nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, 250 struct net_device *dev) 251 { 252 switch (pf) { 253 case NFPROTO_NETDEV: 254 break; 255 #ifdef CONFIG_NETFILTER_FAMILY_ARP 256 case NFPROTO_ARP: 257 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum)) 258 return NULL; 259 return net->nf.hooks_arp + hooknum; 260 #endif 261 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE 262 case NFPROTO_BRIDGE: 263 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum)) 264 return NULL; 265 return net->nf.hooks_bridge + hooknum; 266 #endif 267 case NFPROTO_IPV4: 268 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum)) 269 return NULL; 270 return net->nf.hooks_ipv4 + hooknum; 271 case NFPROTO_IPV6: 272 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum)) 273 return NULL; 274 return net->nf.hooks_ipv6 + hooknum; 275 #if IS_ENABLED(CONFIG_DECNET) 276 case NFPROTO_DECNET: 277 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum)) 278 return NULL; 279 return net->nf.hooks_decnet + hooknum; 280 #endif 281 default: 282 WARN_ON_ONCE(1); 283 return NULL; 284 } 285 286 #ifdef CONFIG_NETFILTER_INGRESS 287 if (hooknum == NF_NETDEV_INGRESS) { 288 if (dev && dev_net(dev) == net) 289 return &dev->nf_hooks_ingress; 290 } 291 #endif 292 WARN_ON_ONCE(1); 293 return NULL; 294 } 295 296 static int __nf_register_net_hook(struct net *net, int pf, 297 const struct nf_hook_ops *reg) 298 { 299 struct nf_hook_entries *p, *new_hooks; 300 struct nf_hook_entries __rcu **pp; 301 302 if (pf == NFPROTO_NETDEV) { 303 #ifndef CONFIG_NETFILTER_INGRESS 304 if (reg->hooknum == NF_NETDEV_INGRESS) 305 return -EOPNOTSUPP; 306 #endif 307 if (reg->hooknum != NF_NETDEV_INGRESS || 308 !reg->dev || dev_net(reg->dev) != net) 309 return -EINVAL; 310 } 311 312 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev); 313 if (!pp) 314 return -EINVAL; 315 316 mutex_lock(&nf_hook_mutex); 317 318 p = nf_entry_dereference(*pp); 319 new_hooks = nf_hook_entries_grow(p, reg); 320 321 if (!IS_ERR(new_hooks)) 322 rcu_assign_pointer(*pp, new_hooks); 323 324 mutex_unlock(&nf_hook_mutex); 325 if (IS_ERR(new_hooks)) 326 return PTR_ERR(new_hooks); 327 328 hooks_validate(new_hooks); 329 #ifdef CONFIG_NETFILTER_INGRESS 330 if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) 331 net_inc_ingress_queue(); 332 #endif 333 #ifdef HAVE_JUMP_LABEL 334 static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]); 335 #endif 336 BUG_ON(p == new_hooks); 337 nf_hook_entries_free(p); 338 return 0; 339 } 340 341 /* 342 * nf_remove_net_hook - remove a hook from blob 343 * 344 * @oldp: current address of hook blob 345 * @unreg: hook to unregister 346 * 347 * This cannot fail, hook unregistration must always succeed. 348 * Therefore replace the to-be-removed hook with a dummy hook. 349 */ 350 static void nf_remove_net_hook(struct nf_hook_entries *old, 351 const struct nf_hook_ops *unreg, int pf) 352 { 353 struct nf_hook_ops **orig_ops; 354 bool found = false; 355 unsigned int i; 356 357 orig_ops = nf_hook_entries_get_hook_ops(old); 358 for (i = 0; i < old->num_hook_entries; i++) { 359 if (orig_ops[i] != unreg) 360 continue; 361 WRITE_ONCE(old->hooks[i].hook, accept_all); 362 WRITE_ONCE(orig_ops[i], &dummy_ops); 363 found = true; 364 break; 365 } 366 367 if (found) { 368 #ifdef CONFIG_NETFILTER_INGRESS 369 if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS) 370 net_dec_ingress_queue(); 371 #endif 372 #ifdef HAVE_JUMP_LABEL 373 static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]); 374 #endif 375 } else { 376 WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum); 377 } 378 } 379 380 static void __nf_unregister_net_hook(struct net *net, int pf, 381 const struct nf_hook_ops *reg) 382 { 383 struct nf_hook_entries __rcu **pp; 384 struct nf_hook_entries *p; 385 386 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev); 387 if (!pp) 388 return; 389 390 mutex_lock(&nf_hook_mutex); 391 392 p = nf_entry_dereference(*pp); 393 if (WARN_ON_ONCE(!p)) { 394 mutex_unlock(&nf_hook_mutex); 395 return; 396 } 397 398 nf_remove_net_hook(p, reg, pf); 399 400 p = __nf_hook_entries_try_shrink(pp); 401 mutex_unlock(&nf_hook_mutex); 402 if (!p) 403 return; 404 405 nf_queue_nf_hook_drop(net); 406 nf_hook_entries_free(p); 407 } 408 409 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) 410 { 411 if (reg->pf == NFPROTO_INET) { 412 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); 413 __nf_unregister_net_hook(net, NFPROTO_IPV6, reg); 414 } else { 415 __nf_unregister_net_hook(net, reg->pf, reg); 416 } 417 } 418 EXPORT_SYMBOL(nf_unregister_net_hook); 419 420 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) 421 { 422 int err; 423 424 if (reg->pf == NFPROTO_INET) { 425 err = __nf_register_net_hook(net, NFPROTO_IPV4, reg); 426 if (err < 0) 427 return err; 428 429 err = __nf_register_net_hook(net, NFPROTO_IPV6, reg); 430 if (err < 0) { 431 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); 432 return err; 433 } 434 } else { 435 err = __nf_register_net_hook(net, reg->pf, reg); 436 if (err < 0) 437 return err; 438 } 439 440 return 0; 441 } 442 EXPORT_SYMBOL(nf_register_net_hook); 443 444 int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, 445 unsigned int n) 446 { 447 unsigned int i; 448 int err = 0; 449 450 for (i = 0; i < n; i++) { 451 err = nf_register_net_hook(net, ®[i]); 452 if (err) 453 goto err; 454 } 455 return err; 456 457 err: 458 if (i > 0) 459 nf_unregister_net_hooks(net, reg, i); 460 return err; 461 } 462 EXPORT_SYMBOL(nf_register_net_hooks); 463 464 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, 465 unsigned int hookcount) 466 { 467 unsigned int i; 468 469 for (i = 0; i < hookcount; i++) 470 nf_unregister_net_hook(net, ®[i]); 471 } 472 EXPORT_SYMBOL(nf_unregister_net_hooks); 473 474 /* Returns 1 if okfn() needs to be executed by the caller, 475 * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ 476 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, 477 const struct nf_hook_entries *e, unsigned int s) 478 { 479 unsigned int verdict; 480 int ret; 481 482 for (; s < e->num_hook_entries; s++) { 483 verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state); 484 switch (verdict & NF_VERDICT_MASK) { 485 case NF_ACCEPT: 486 break; 487 case NF_DROP: 488 kfree_skb(skb); 489 ret = NF_DROP_GETERR(verdict); 490 if (ret == 0) 491 ret = -EPERM; 492 return ret; 493 case NF_QUEUE: 494 ret = nf_queue(skb, state, e, s, verdict); 495 if (ret == 1) 496 continue; 497 return ret; 498 default: 499 /* Implicit handling for NF_STOLEN, as well as any other 500 * non conventional verdicts. 501 */ 502 return 0; 503 } 504 } 505 506 return 1; 507 } 508 EXPORT_SYMBOL(nf_hook_slow); 509 510 511 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) 512 { 513 if (writable_len > skb->len) 514 return 0; 515 516 /* Not exclusive use of packet? Must copy. */ 517 if (!skb_cloned(skb)) { 518 if (writable_len <= skb_headlen(skb)) 519 return 1; 520 } else if (skb_clone_writable(skb, writable_len)) 521 return 1; 522 523 if (writable_len <= skb_headlen(skb)) 524 writable_len = 0; 525 else 526 writable_len -= skb_headlen(skb); 527 528 return !!__pskb_pull_tail(skb, writable_len); 529 } 530 EXPORT_SYMBOL(skb_make_writable); 531 532 /* This needs to be compiled in any case to avoid dependencies between the 533 * nfnetlink_queue code and nf_conntrack. 534 */ 535 struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; 536 EXPORT_SYMBOL_GPL(nfnl_ct_hook); 537 538 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 539 /* This does not belong here, but locally generated errors need it if connection 540 tracking in use: without this, connection may not be in hash table, and hence 541 manufactured ICMP or RST packets will not be associated with it. */ 542 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) 543 __rcu __read_mostly; 544 EXPORT_SYMBOL(ip_ct_attach); 545 546 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) 547 { 548 void (*attach)(struct sk_buff *, const struct sk_buff *); 549 550 if (skb->_nfct) { 551 rcu_read_lock(); 552 attach = rcu_dereference(ip_ct_attach); 553 if (attach) 554 attach(new, skb); 555 rcu_read_unlock(); 556 } 557 } 558 EXPORT_SYMBOL(nf_ct_attach); 559 560 void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly; 561 EXPORT_SYMBOL(nf_ct_destroy); 562 563 void nf_conntrack_destroy(struct nf_conntrack *nfct) 564 { 565 void (*destroy)(struct nf_conntrack *); 566 567 rcu_read_lock(); 568 destroy = rcu_dereference(nf_ct_destroy); 569 BUG_ON(destroy == NULL); 570 destroy(nfct); 571 rcu_read_unlock(); 572 } 573 EXPORT_SYMBOL(nf_conntrack_destroy); 574 575 /* Built-in default zone used e.g. by modules. */ 576 const struct nf_conntrack_zone nf_ct_zone_dflt = { 577 .id = NF_CT_DEFAULT_ZONE_ID, 578 .dir = NF_CT_DEFAULT_ZONE_DIR, 579 }; 580 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt); 581 #endif /* CONFIG_NF_CONNTRACK */ 582 583 #ifdef CONFIG_NF_NAT_NEEDED 584 void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); 585 EXPORT_SYMBOL(nf_nat_decode_session_hook); 586 #endif 587 588 static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max) 589 { 590 int h; 591 592 for (h = 0; h < max; h++) 593 RCU_INIT_POINTER(e[h], NULL); 594 } 595 596 static int __net_init netfilter_net_init(struct net *net) 597 { 598 __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4)); 599 __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6)); 600 #ifdef CONFIG_NETFILTER_FAMILY_ARP 601 __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp)); 602 #endif 603 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE 604 __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); 605 #endif 606 #if IS_ENABLED(CONFIG_DECNET) 607 __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); 608 #endif 609 610 #ifdef CONFIG_PROC_FS 611 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", 612 net->proc_net); 613 if (!net->nf.proc_netfilter) { 614 if (!net_eq(net, &init_net)) 615 pr_err("cannot create netfilter proc entry"); 616 617 return -ENOMEM; 618 } 619 #endif 620 621 return 0; 622 } 623 624 static void __net_exit netfilter_net_exit(struct net *net) 625 { 626 remove_proc_entry("netfilter", net->proc_net); 627 } 628 629 static struct pernet_operations netfilter_net_ops = { 630 .init = netfilter_net_init, 631 .exit = netfilter_net_exit, 632 }; 633 634 int __init netfilter_init(void) 635 { 636 int ret; 637 638 ret = register_pernet_subsys(&netfilter_net_ops); 639 if (ret < 0) 640 goto err; 641 642 ret = netfilter_log_init(); 643 if (ret < 0) 644 goto err_pernet; 645 646 return 0; 647 err_pernet: 648 unregister_pernet_subsys(&netfilter_net_ops); 649 err: 650 return ret; 651 } 652