1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NETLINK Kernel-user communication protocol. 4 * 5 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * Patrick McHardy <kaber@trash.net> 8 * 9 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 10 * added netlink_proto_exit 11 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 12 * use nlk_sk, as sk->protinfo is on a diet 8) 13 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 14 * - inc module use count of module that owns 15 * the kernel socket in case userspace opens 16 * socket of same protocol 17 * - remove all module support, since netlink is 18 * mandatory if CONFIG_NET=y these days 19 */ 20 21 #include <linux/module.h> 22 23 #include <linux/capability.h> 24 #include <linux/kernel.h> 25 #include <linux/init.h> 26 #include <linux/signal.h> 27 #include <linux/sched.h> 28 #include <linux/errno.h> 29 #include <linux/string.h> 30 #include <linux/stat.h> 31 #include <linux/socket.h> 32 #include <linux/un.h> 33 #include <linux/fcntl.h> 34 #include <linux/termios.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/fs.h> 38 #include <linux/slab.h> 39 #include <linux/uaccess.h> 40 #include <linux/skbuff.h> 41 #include <linux/netdevice.h> 42 #include <linux/rtnetlink.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/notifier.h> 46 #include <linux/security.h> 47 #include <linux/jhash.h> 48 #include <linux/jiffies.h> 49 #include <linux/random.h> 50 #include <linux/bitops.h> 51 #include <linux/mm.h> 52 #include <linux/types.h> 53 #include <linux/audit.h> 54 #include <linux/mutex.h> 55 #include <linux/vmalloc.h> 56 #include <linux/if_arp.h> 57 #include <linux/rhashtable.h> 58 #include <asm/cacheflush.h> 59 #include <linux/hash.h> 60 #include <linux/genetlink.h> 61 #include <linux/net_namespace.h> 62 #include <linux/nospec.h> 63 #include <linux/btf_ids.h> 64 65 #include <net/net_namespace.h> 66 #include <net/netns/generic.h> 67 #include <net/sock.h> 68 #include <net/scm.h> 69 #include <net/netlink.h> 70 71 #include "af_netlink.h" 72 73 struct listeners { 74 struct rcu_head rcu; 75 unsigned long masks[]; 76 }; 77 78 /* state bits */ 79 #define NETLINK_S_CONGESTED 0x0 80 81 static inline int netlink_is_kernel(struct sock *sk) 82 { 83 return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; 84 } 85 86 struct netlink_table *nl_table __read_mostly; 87 EXPORT_SYMBOL_GPL(nl_table); 88 89 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 90 91 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; 92 93 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { 94 "nlk_cb_mutex-ROUTE", 95 "nlk_cb_mutex-1", 96 "nlk_cb_mutex-USERSOCK", 97 "nlk_cb_mutex-FIREWALL", 98 "nlk_cb_mutex-SOCK_DIAG", 99 "nlk_cb_mutex-NFLOG", 100 "nlk_cb_mutex-XFRM", 101 "nlk_cb_mutex-SELINUX", 102 "nlk_cb_mutex-ISCSI", 103 "nlk_cb_mutex-AUDIT", 104 "nlk_cb_mutex-FIB_LOOKUP", 105 "nlk_cb_mutex-CONNECTOR", 106 "nlk_cb_mutex-NETFILTER", 107 "nlk_cb_mutex-IP6_FW", 108 "nlk_cb_mutex-DNRTMSG", 109 "nlk_cb_mutex-KOBJECT_UEVENT", 110 "nlk_cb_mutex-GENERIC", 111 "nlk_cb_mutex-17", 112 "nlk_cb_mutex-SCSITRANSPORT", 113 "nlk_cb_mutex-ECRYPTFS", 114 "nlk_cb_mutex-RDMA", 115 "nlk_cb_mutex-CRYPTO", 116 "nlk_cb_mutex-SMC", 117 "nlk_cb_mutex-23", 118 "nlk_cb_mutex-24", 119 "nlk_cb_mutex-25", 120 "nlk_cb_mutex-26", 121 "nlk_cb_mutex-27", 122 "nlk_cb_mutex-28", 123 "nlk_cb_mutex-29", 124 "nlk_cb_mutex-30", 125 "nlk_cb_mutex-31", 126 "nlk_cb_mutex-MAX_LINKS" 127 }; 128 129 static int netlink_dump(struct sock *sk); 130 131 /* nl_table locking explained: 132 * Lookup and traversal are protected with an RCU read-side lock. Insertion 133 * and removal are protected with per bucket lock while using RCU list 134 * modification primitives and may run in parallel to RCU protected lookups. 135 * Destruction of the Netlink socket may only occur *after* nl_table_lock has 136 * been acquired * either during or after the socket has been removed from 137 * the list and after an RCU grace period. 138 */ 139 DEFINE_RWLOCK(nl_table_lock); 140 EXPORT_SYMBOL_GPL(nl_table_lock); 141 static atomic_t nl_table_users = ATOMIC_INIT(0); 142 143 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 144 145 static BLOCKING_NOTIFIER_HEAD(netlink_chain); 146 147 148 static const struct rhashtable_params netlink_rhashtable_params; 149 150 static inline u32 netlink_group_mask(u32 group) 151 { 152 return group ? 1 << (group - 1) : 0; 153 } 154 155 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, 156 gfp_t gfp_mask) 157 { 158 unsigned int len = skb_end_offset(skb); 159 struct sk_buff *new; 160 161 new = alloc_skb(len, gfp_mask); 162 if (new == NULL) 163 return NULL; 164 165 NETLINK_CB(new).portid = NETLINK_CB(skb).portid; 166 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; 167 NETLINK_CB(new).creds = NETLINK_CB(skb).creds; 168 169 skb_put_data(new, skb->data, len); 170 return new; 171 } 172 173 static unsigned int netlink_tap_net_id; 174 175 struct netlink_tap_net { 176 struct list_head netlink_tap_all; 177 struct mutex netlink_tap_lock; 178 }; 179 180 int netlink_add_tap(struct netlink_tap *nt) 181 { 182 struct net *net = dev_net(nt->dev); 183 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 184 185 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 186 return -EINVAL; 187 188 mutex_lock(&nn->netlink_tap_lock); 189 list_add_rcu(&nt->list, &nn->netlink_tap_all); 190 mutex_unlock(&nn->netlink_tap_lock); 191 192 __module_get(nt->module); 193 194 return 0; 195 } 196 EXPORT_SYMBOL_GPL(netlink_add_tap); 197 198 static int __netlink_remove_tap(struct netlink_tap *nt) 199 { 200 struct net *net = dev_net(nt->dev); 201 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 202 bool found = false; 203 struct netlink_tap *tmp; 204 205 mutex_lock(&nn->netlink_tap_lock); 206 207 list_for_each_entry(tmp, &nn->netlink_tap_all, list) { 208 if (nt == tmp) { 209 list_del_rcu(&nt->list); 210 found = true; 211 goto out; 212 } 213 } 214 215 pr_warn("__netlink_remove_tap: %p not found\n", nt); 216 out: 217 mutex_unlock(&nn->netlink_tap_lock); 218 219 if (found) 220 module_put(nt->module); 221 222 return found ? 0 : -ENODEV; 223 } 224 225 int netlink_remove_tap(struct netlink_tap *nt) 226 { 227 int ret; 228 229 ret = __netlink_remove_tap(nt); 230 synchronize_net(); 231 232 return ret; 233 } 234 EXPORT_SYMBOL_GPL(netlink_remove_tap); 235 236 static __net_init int netlink_tap_init_net(struct net *net) 237 { 238 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 239 240 INIT_LIST_HEAD(&nn->netlink_tap_all); 241 mutex_init(&nn->netlink_tap_lock); 242 return 0; 243 } 244 245 static struct pernet_operations netlink_tap_net_ops = { 246 .init = netlink_tap_init_net, 247 .id = &netlink_tap_net_id, 248 .size = sizeof(struct netlink_tap_net), 249 }; 250 251 static bool netlink_filter_tap(const struct sk_buff *skb) 252 { 253 struct sock *sk = skb->sk; 254 255 /* We take the more conservative approach and 256 * whitelist socket protocols that may pass. 257 */ 258 switch (sk->sk_protocol) { 259 case NETLINK_ROUTE: 260 case NETLINK_USERSOCK: 261 case NETLINK_SOCK_DIAG: 262 case NETLINK_NFLOG: 263 case NETLINK_XFRM: 264 case NETLINK_FIB_LOOKUP: 265 case NETLINK_NETFILTER: 266 case NETLINK_GENERIC: 267 return true; 268 } 269 270 return false; 271 } 272 273 static int __netlink_deliver_tap_skb(struct sk_buff *skb, 274 struct net_device *dev) 275 { 276 struct sk_buff *nskb; 277 struct sock *sk = skb->sk; 278 int ret = -ENOMEM; 279 280 if (!net_eq(dev_net(dev), sock_net(sk))) 281 return 0; 282 283 dev_hold(dev); 284 285 if (is_vmalloc_addr(skb->head)) 286 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 287 else 288 nskb = skb_clone(skb, GFP_ATOMIC); 289 if (nskb) { 290 nskb->dev = dev; 291 nskb->protocol = htons((u16) sk->sk_protocol); 292 nskb->pkt_type = netlink_is_kernel(sk) ? 293 PACKET_KERNEL : PACKET_USER; 294 skb_reset_network_header(nskb); 295 ret = dev_queue_xmit(nskb); 296 if (unlikely(ret > 0)) 297 ret = net_xmit_errno(ret); 298 } 299 300 dev_put(dev); 301 return ret; 302 } 303 304 static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn) 305 { 306 int ret; 307 struct netlink_tap *tmp; 308 309 if (!netlink_filter_tap(skb)) 310 return; 311 312 list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) { 313 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 314 if (unlikely(ret)) 315 break; 316 } 317 } 318 319 static void netlink_deliver_tap(struct net *net, struct sk_buff *skb) 320 { 321 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 322 323 rcu_read_lock(); 324 325 if (unlikely(!list_empty(&nn->netlink_tap_all))) 326 __netlink_deliver_tap(skb, nn); 327 328 rcu_read_unlock(); 329 } 330 331 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, 332 struct sk_buff *skb) 333 { 334 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) 335 netlink_deliver_tap(sock_net(dst), skb); 336 } 337 338 static void netlink_overrun(struct sock *sk) 339 { 340 struct netlink_sock *nlk = nlk_sk(sk); 341 342 if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { 343 if (!test_and_set_bit(NETLINK_S_CONGESTED, 344 &nlk_sk(sk)->state)) { 345 sk->sk_err = ENOBUFS; 346 sk->sk_error_report(sk); 347 } 348 } 349 atomic_inc(&sk->sk_drops); 350 } 351 352 static void netlink_rcv_wake(struct sock *sk) 353 { 354 struct netlink_sock *nlk = nlk_sk(sk); 355 356 if (skb_queue_empty_lockless(&sk->sk_receive_queue)) 357 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 358 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) 359 wake_up_interruptible(&nlk->wait); 360 } 361 362 static void netlink_skb_destructor(struct sk_buff *skb) 363 { 364 if (is_vmalloc_addr(skb->head)) { 365 if (!skb->cloned || 366 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 367 vfree(skb->head); 368 369 skb->head = NULL; 370 } 371 if (skb->sk != NULL) 372 sock_rfree(skb); 373 } 374 375 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 376 { 377 WARN_ON(skb->sk != NULL); 378 skb->sk = sk; 379 skb->destructor = netlink_skb_destructor; 380 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 381 sk_mem_charge(sk, skb->truesize); 382 } 383 384 static void netlink_sock_destruct(struct sock *sk) 385 { 386 struct netlink_sock *nlk = nlk_sk(sk); 387 388 if (nlk->cb_running) { 389 if (nlk->cb.done) 390 nlk->cb.done(&nlk->cb); 391 module_put(nlk->cb.module); 392 kfree_skb(nlk->cb.skb); 393 } 394 395 skb_queue_purge(&sk->sk_receive_queue); 396 397 if (!sock_flag(sk, SOCK_DEAD)) { 398 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 399 return; 400 } 401 402 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 403 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 404 WARN_ON(nlk_sk(sk)->groups); 405 } 406 407 static void netlink_sock_destruct_work(struct work_struct *work) 408 { 409 struct netlink_sock *nlk = container_of(work, struct netlink_sock, 410 work); 411 412 sk_free(&nlk->sk); 413 } 414 415 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 416 * SMP. Look, when several writers sleep and reader wakes them up, all but one 417 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 418 * this, _but_ remember, it adds useless work on UP machines. 419 */ 420 421 void netlink_table_grab(void) 422 __acquires(nl_table_lock) 423 { 424 might_sleep(); 425 426 write_lock_irq(&nl_table_lock); 427 428 if (atomic_read(&nl_table_users)) { 429 DECLARE_WAITQUEUE(wait, current); 430 431 add_wait_queue_exclusive(&nl_table_wait, &wait); 432 for (;;) { 433 set_current_state(TASK_UNINTERRUPTIBLE); 434 if (atomic_read(&nl_table_users) == 0) 435 break; 436 write_unlock_irq(&nl_table_lock); 437 schedule(); 438 write_lock_irq(&nl_table_lock); 439 } 440 441 __set_current_state(TASK_RUNNING); 442 remove_wait_queue(&nl_table_wait, &wait); 443 } 444 } 445 446 void netlink_table_ungrab(void) 447 __releases(nl_table_lock) 448 { 449 write_unlock_irq(&nl_table_lock); 450 wake_up(&nl_table_wait); 451 } 452 453 static inline void 454 netlink_lock_table(void) 455 { 456 /* read_lock() synchronizes us to netlink_table_grab */ 457 458 read_lock(&nl_table_lock); 459 atomic_inc(&nl_table_users); 460 read_unlock(&nl_table_lock); 461 } 462 463 static inline void 464 netlink_unlock_table(void) 465 { 466 if (atomic_dec_and_test(&nl_table_users)) 467 wake_up(&nl_table_wait); 468 } 469 470 struct netlink_compare_arg 471 { 472 possible_net_t pnet; 473 u32 portid; 474 }; 475 476 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ 477 #define netlink_compare_arg_len \ 478 (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) 479 480 static inline int netlink_compare(struct rhashtable_compare_arg *arg, 481 const void *ptr) 482 { 483 const struct netlink_compare_arg *x = arg->key; 484 const struct netlink_sock *nlk = ptr; 485 486 return nlk->portid != x->portid || 487 !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); 488 } 489 490 static void netlink_compare_arg_init(struct netlink_compare_arg *arg, 491 struct net *net, u32 portid) 492 { 493 memset(arg, 0, sizeof(*arg)); 494 write_pnet(&arg->pnet, net); 495 arg->portid = portid; 496 } 497 498 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, 499 struct net *net) 500 { 501 struct netlink_compare_arg arg; 502 503 netlink_compare_arg_init(&arg, net, portid); 504 return rhashtable_lookup_fast(&table->hash, &arg, 505 netlink_rhashtable_params); 506 } 507 508 static int __netlink_insert(struct netlink_table *table, struct sock *sk) 509 { 510 struct netlink_compare_arg arg; 511 512 netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); 513 return rhashtable_lookup_insert_key(&table->hash, &arg, 514 &nlk_sk(sk)->node, 515 netlink_rhashtable_params); 516 } 517 518 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 519 { 520 struct netlink_table *table = &nl_table[protocol]; 521 struct sock *sk; 522 523 rcu_read_lock(); 524 sk = __netlink_lookup(table, portid, net); 525 if (sk) 526 sock_hold(sk); 527 rcu_read_unlock(); 528 529 return sk; 530 } 531 532 static const struct proto_ops netlink_ops; 533 534 static void 535 netlink_update_listeners(struct sock *sk) 536 { 537 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 538 unsigned long mask; 539 unsigned int i; 540 struct listeners *listeners; 541 542 listeners = nl_deref_protected(tbl->listeners); 543 if (!listeners) 544 return; 545 546 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 547 mask = 0; 548 sk_for_each_bound(sk, &tbl->mc_list) { 549 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 550 mask |= nlk_sk(sk)->groups[i]; 551 } 552 listeners->masks[i] = mask; 553 } 554 /* this function is only called with the netlink table "grabbed", which 555 * makes sure updates are visible before bind or setsockopt return. */ 556 } 557 558 static int netlink_insert(struct sock *sk, u32 portid) 559 { 560 struct netlink_table *table = &nl_table[sk->sk_protocol]; 561 int err; 562 563 lock_sock(sk); 564 565 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; 566 if (nlk_sk(sk)->bound) 567 goto err; 568 569 nlk_sk(sk)->portid = portid; 570 sock_hold(sk); 571 572 err = __netlink_insert(table, sk); 573 if (err) { 574 /* In case the hashtable backend returns with -EBUSY 575 * from here, it must not escape to the caller. 576 */ 577 if (unlikely(err == -EBUSY)) 578 err = -EOVERFLOW; 579 if (err == -EEXIST) 580 err = -EADDRINUSE; 581 sock_put(sk); 582 goto err; 583 } 584 585 /* We need to ensure that the socket is hashed and visible. */ 586 smp_wmb(); 587 nlk_sk(sk)->bound = portid; 588 589 err: 590 release_sock(sk); 591 return err; 592 } 593 594 static void netlink_remove(struct sock *sk) 595 { 596 struct netlink_table *table; 597 598 table = &nl_table[sk->sk_protocol]; 599 if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, 600 netlink_rhashtable_params)) { 601 WARN_ON(refcount_read(&sk->sk_refcnt) == 1); 602 __sock_put(sk); 603 } 604 605 netlink_table_grab(); 606 if (nlk_sk(sk)->subscriptions) { 607 __sk_del_bind_node(sk); 608 netlink_update_listeners(sk); 609 } 610 if (sk->sk_protocol == NETLINK_GENERIC) 611 atomic_inc(&genl_sk_destructing_cnt); 612 netlink_table_ungrab(); 613 } 614 615 static struct proto netlink_proto = { 616 .name = "NETLINK", 617 .owner = THIS_MODULE, 618 .obj_size = sizeof(struct netlink_sock), 619 }; 620 621 static int __netlink_create(struct net *net, struct socket *sock, 622 struct mutex *cb_mutex, int protocol, 623 int kern) 624 { 625 struct sock *sk; 626 struct netlink_sock *nlk; 627 628 sock->ops = &netlink_ops; 629 630 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); 631 if (!sk) 632 return -ENOMEM; 633 634 sock_init_data(sock, sk); 635 636 nlk = nlk_sk(sk); 637 if (cb_mutex) { 638 nlk->cb_mutex = cb_mutex; 639 } else { 640 nlk->cb_mutex = &nlk->cb_def_mutex; 641 mutex_init(nlk->cb_mutex); 642 lockdep_set_class_and_name(nlk->cb_mutex, 643 nlk_cb_mutex_keys + protocol, 644 nlk_cb_mutex_key_strings[protocol]); 645 } 646 init_waitqueue_head(&nlk->wait); 647 648 sk->sk_destruct = netlink_sock_destruct; 649 sk->sk_protocol = protocol; 650 return 0; 651 } 652 653 static int netlink_create(struct net *net, struct socket *sock, int protocol, 654 int kern) 655 { 656 struct module *module = NULL; 657 struct mutex *cb_mutex; 658 struct netlink_sock *nlk; 659 int (*bind)(struct net *net, int group); 660 void (*unbind)(struct net *net, int group); 661 int err = 0; 662 663 sock->state = SS_UNCONNECTED; 664 665 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 666 return -ESOCKTNOSUPPORT; 667 668 if (protocol < 0 || protocol >= MAX_LINKS) 669 return -EPROTONOSUPPORT; 670 protocol = array_index_nospec(protocol, MAX_LINKS); 671 672 netlink_lock_table(); 673 #ifdef CONFIG_MODULES 674 if (!nl_table[protocol].registered) { 675 netlink_unlock_table(); 676 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 677 netlink_lock_table(); 678 } 679 #endif 680 if (nl_table[protocol].registered && 681 try_module_get(nl_table[protocol].module)) 682 module = nl_table[protocol].module; 683 else 684 err = -EPROTONOSUPPORT; 685 cb_mutex = nl_table[protocol].cb_mutex; 686 bind = nl_table[protocol].bind; 687 unbind = nl_table[protocol].unbind; 688 netlink_unlock_table(); 689 690 if (err < 0) 691 goto out; 692 693 err = __netlink_create(net, sock, cb_mutex, protocol, kern); 694 if (err < 0) 695 goto out_module; 696 697 local_bh_disable(); 698 sock_prot_inuse_add(net, &netlink_proto, 1); 699 local_bh_enable(); 700 701 nlk = nlk_sk(sock->sk); 702 nlk->module = module; 703 nlk->netlink_bind = bind; 704 nlk->netlink_unbind = unbind; 705 out: 706 return err; 707 708 out_module: 709 module_put(module); 710 goto out; 711 } 712 713 static void deferred_put_nlk_sk(struct rcu_head *head) 714 { 715 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); 716 struct sock *sk = &nlk->sk; 717 718 kfree(nlk->groups); 719 nlk->groups = NULL; 720 721 if (!refcount_dec_and_test(&sk->sk_refcnt)) 722 return; 723 724 if (nlk->cb_running && nlk->cb.done) { 725 INIT_WORK(&nlk->work, netlink_sock_destruct_work); 726 schedule_work(&nlk->work); 727 return; 728 } 729 730 sk_free(sk); 731 } 732 733 static int netlink_release(struct socket *sock) 734 { 735 struct sock *sk = sock->sk; 736 struct netlink_sock *nlk; 737 738 if (!sk) 739 return 0; 740 741 netlink_remove(sk); 742 sock_orphan(sk); 743 nlk = nlk_sk(sk); 744 745 /* 746 * OK. Socket is unlinked, any packets that arrive now 747 * will be purged. 748 */ 749 750 /* must not acquire netlink_table_lock in any way again before unbind 751 * and notifying genetlink is done as otherwise it might deadlock 752 */ 753 if (nlk->netlink_unbind) { 754 int i; 755 756 for (i = 0; i < nlk->ngroups; i++) 757 if (test_bit(i, nlk->groups)) 758 nlk->netlink_unbind(sock_net(sk), i + 1); 759 } 760 if (sk->sk_protocol == NETLINK_GENERIC && 761 atomic_dec_return(&genl_sk_destructing_cnt) == 0) 762 wake_up(&genl_sk_destructing_waitq); 763 764 sock->sk = NULL; 765 wake_up_interruptible_all(&nlk->wait); 766 767 skb_queue_purge(&sk->sk_write_queue); 768 769 if (nlk->portid && nlk->bound) { 770 struct netlink_notify n = { 771 .net = sock_net(sk), 772 .protocol = sk->sk_protocol, 773 .portid = nlk->portid, 774 }; 775 blocking_notifier_call_chain(&netlink_chain, 776 NETLINK_URELEASE, &n); 777 } 778 779 module_put(nlk->module); 780 781 if (netlink_is_kernel(sk)) { 782 netlink_table_grab(); 783 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 784 if (--nl_table[sk->sk_protocol].registered == 0) { 785 struct listeners *old; 786 787 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 788 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 789 kfree_rcu(old, rcu); 790 nl_table[sk->sk_protocol].module = NULL; 791 nl_table[sk->sk_protocol].bind = NULL; 792 nl_table[sk->sk_protocol].unbind = NULL; 793 nl_table[sk->sk_protocol].flags = 0; 794 nl_table[sk->sk_protocol].registered = 0; 795 } 796 netlink_table_ungrab(); 797 } 798 799 local_bh_disable(); 800 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 801 local_bh_enable(); 802 call_rcu(&nlk->rcu, deferred_put_nlk_sk); 803 return 0; 804 } 805 806 static int netlink_autobind(struct socket *sock) 807 { 808 struct sock *sk = sock->sk; 809 struct net *net = sock_net(sk); 810 struct netlink_table *table = &nl_table[sk->sk_protocol]; 811 s32 portid = task_tgid_vnr(current); 812 int err; 813 s32 rover = -4096; 814 bool ok; 815 816 retry: 817 cond_resched(); 818 rcu_read_lock(); 819 ok = !__netlink_lookup(table, portid, net); 820 rcu_read_unlock(); 821 if (!ok) { 822 /* Bind collision, search negative portid values. */ 823 if (rover == -4096) 824 /* rover will be in range [S32_MIN, -4097] */ 825 rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN); 826 else if (rover >= -4096) 827 rover = -4097; 828 portid = rover--; 829 goto retry; 830 } 831 832 err = netlink_insert(sk, portid); 833 if (err == -EADDRINUSE) 834 goto retry; 835 836 /* If 2 threads race to autobind, that is fine. */ 837 if (err == -EBUSY) 838 err = 0; 839 840 return err; 841 } 842 843 /** 844 * __netlink_ns_capable - General netlink message capability test 845 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. 846 * @user_ns: The user namespace of the capability to use 847 * @cap: The capability to use 848 * 849 * Test to see if the opener of the socket we received the message 850 * from had when the netlink socket was created and the sender of the 851 * message has the capability @cap in the user namespace @user_ns. 852 */ 853 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, 854 struct user_namespace *user_ns, int cap) 855 { 856 return ((nsp->flags & NETLINK_SKB_DST) || 857 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && 858 ns_capable(user_ns, cap); 859 } 860 EXPORT_SYMBOL(__netlink_ns_capable); 861 862 /** 863 * netlink_ns_capable - General netlink message capability test 864 * @skb: socket buffer holding a netlink command from userspace 865 * @user_ns: The user namespace of the capability to use 866 * @cap: The capability to use 867 * 868 * Test to see if the opener of the socket we received the message 869 * from had when the netlink socket was created and the sender of the 870 * message has the capability @cap in the user namespace @user_ns. 871 */ 872 bool netlink_ns_capable(const struct sk_buff *skb, 873 struct user_namespace *user_ns, int cap) 874 { 875 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); 876 } 877 EXPORT_SYMBOL(netlink_ns_capable); 878 879 /** 880 * netlink_capable - Netlink global message capability test 881 * @skb: socket buffer holding a netlink command from userspace 882 * @cap: The capability to use 883 * 884 * Test to see if the opener of the socket we received the message 885 * from had when the netlink socket was created and the sender of the 886 * message has the capability @cap in all user namespaces. 887 */ 888 bool netlink_capable(const struct sk_buff *skb, int cap) 889 { 890 return netlink_ns_capable(skb, &init_user_ns, cap); 891 } 892 EXPORT_SYMBOL(netlink_capable); 893 894 /** 895 * netlink_net_capable - Netlink network namespace message capability test 896 * @skb: socket buffer holding a netlink command from userspace 897 * @cap: The capability to use 898 * 899 * Test to see if the opener of the socket we received the message 900 * from had when the netlink socket was created and the sender of the 901 * message has the capability @cap over the network namespace of 902 * the socket we received the message from. 903 */ 904 bool netlink_net_capable(const struct sk_buff *skb, int cap) 905 { 906 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); 907 } 908 EXPORT_SYMBOL(netlink_net_capable); 909 910 static inline int netlink_allowed(const struct socket *sock, unsigned int flag) 911 { 912 return (nl_table[sock->sk->sk_protocol].flags & flag) || 913 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 914 } 915 916 static void 917 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 918 { 919 struct netlink_sock *nlk = nlk_sk(sk); 920 921 if (nlk->subscriptions && !subscriptions) 922 __sk_del_bind_node(sk); 923 else if (!nlk->subscriptions && subscriptions) 924 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 925 nlk->subscriptions = subscriptions; 926 } 927 928 static int netlink_realloc_groups(struct sock *sk) 929 { 930 struct netlink_sock *nlk = nlk_sk(sk); 931 unsigned int groups; 932 unsigned long *new_groups; 933 int err = 0; 934 935 netlink_table_grab(); 936 937 groups = nl_table[sk->sk_protocol].groups; 938 if (!nl_table[sk->sk_protocol].registered) { 939 err = -ENOENT; 940 goto out_unlock; 941 } 942 943 if (nlk->ngroups >= groups) 944 goto out_unlock; 945 946 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 947 if (new_groups == NULL) { 948 err = -ENOMEM; 949 goto out_unlock; 950 } 951 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 952 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 953 954 nlk->groups = new_groups; 955 nlk->ngroups = groups; 956 out_unlock: 957 netlink_table_ungrab(); 958 return err; 959 } 960 961 static void netlink_undo_bind(int group, long unsigned int groups, 962 struct sock *sk) 963 { 964 struct netlink_sock *nlk = nlk_sk(sk); 965 int undo; 966 967 if (!nlk->netlink_unbind) 968 return; 969 970 for (undo = 0; undo < group; undo++) 971 if (test_bit(undo, &groups)) 972 nlk->netlink_unbind(sock_net(sk), undo + 1); 973 } 974 975 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 976 int addr_len) 977 { 978 struct sock *sk = sock->sk; 979 struct net *net = sock_net(sk); 980 struct netlink_sock *nlk = nlk_sk(sk); 981 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 982 int err = 0; 983 unsigned long groups; 984 bool bound; 985 986 if (addr_len < sizeof(struct sockaddr_nl)) 987 return -EINVAL; 988 989 if (nladdr->nl_family != AF_NETLINK) 990 return -EINVAL; 991 groups = nladdr->nl_groups; 992 993 /* Only superuser is allowed to listen multicasts */ 994 if (groups) { 995 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 996 return -EPERM; 997 err = netlink_realloc_groups(sk); 998 if (err) 999 return err; 1000 } 1001 1002 if (nlk->ngroups < BITS_PER_LONG) 1003 groups &= (1UL << nlk->ngroups) - 1; 1004 1005 bound = nlk->bound; 1006 if (bound) { 1007 /* Ensure nlk->portid is up-to-date. */ 1008 smp_rmb(); 1009 1010 if (nladdr->nl_pid != nlk->portid) 1011 return -EINVAL; 1012 } 1013 1014 netlink_lock_table(); 1015 if (nlk->netlink_bind && groups) { 1016 int group; 1017 1018 /* nl_groups is a u32, so cap the maximum groups we can bind */ 1019 for (group = 0; group < BITS_PER_TYPE(u32); group++) { 1020 if (!test_bit(group, &groups)) 1021 continue; 1022 err = nlk->netlink_bind(net, group + 1); 1023 if (!err) 1024 continue; 1025 netlink_undo_bind(group, groups, sk); 1026 goto unlock; 1027 } 1028 } 1029 1030 /* No need for barriers here as we return to user-space without 1031 * using any of the bound attributes. 1032 */ 1033 if (!bound) { 1034 err = nladdr->nl_pid ? 1035 netlink_insert(sk, nladdr->nl_pid) : 1036 netlink_autobind(sock); 1037 if (err) { 1038 netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); 1039 goto unlock; 1040 } 1041 } 1042 1043 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 1044 goto unlock; 1045 netlink_unlock_table(); 1046 1047 netlink_table_grab(); 1048 netlink_update_subscriptions(sk, nlk->subscriptions + 1049 hweight32(groups) - 1050 hweight32(nlk->groups[0])); 1051 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; 1052 netlink_update_listeners(sk); 1053 netlink_table_ungrab(); 1054 1055 return 0; 1056 1057 unlock: 1058 netlink_unlock_table(); 1059 return err; 1060 } 1061 1062 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 1063 int alen, int flags) 1064 { 1065 int err = 0; 1066 struct sock *sk = sock->sk; 1067 struct netlink_sock *nlk = nlk_sk(sk); 1068 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1069 1070 if (alen < sizeof(addr->sa_family)) 1071 return -EINVAL; 1072 1073 if (addr->sa_family == AF_UNSPEC) { 1074 sk->sk_state = NETLINK_UNCONNECTED; 1075 nlk->dst_portid = 0; 1076 nlk->dst_group = 0; 1077 return 0; 1078 } 1079 if (addr->sa_family != AF_NETLINK) 1080 return -EINVAL; 1081 1082 if (alen < sizeof(struct sockaddr_nl)) 1083 return -EINVAL; 1084 1085 if ((nladdr->nl_groups || nladdr->nl_pid) && 1086 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1087 return -EPERM; 1088 1089 /* No need for barriers here as we return to user-space without 1090 * using any of the bound attributes. 1091 */ 1092 if (!nlk->bound) 1093 err = netlink_autobind(sock); 1094 1095 if (err == 0) { 1096 sk->sk_state = NETLINK_CONNECTED; 1097 nlk->dst_portid = nladdr->nl_pid; 1098 nlk->dst_group = ffs(nladdr->nl_groups); 1099 } 1100 1101 return err; 1102 } 1103 1104 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1105 int peer) 1106 { 1107 struct sock *sk = sock->sk; 1108 struct netlink_sock *nlk = nlk_sk(sk); 1109 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 1110 1111 nladdr->nl_family = AF_NETLINK; 1112 nladdr->nl_pad = 0; 1113 1114 if (peer) { 1115 nladdr->nl_pid = nlk->dst_portid; 1116 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 1117 } else { 1118 nladdr->nl_pid = nlk->portid; 1119 netlink_lock_table(); 1120 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1121 netlink_unlock_table(); 1122 } 1123 return sizeof(*nladdr); 1124 } 1125 1126 static int netlink_ioctl(struct socket *sock, unsigned int cmd, 1127 unsigned long arg) 1128 { 1129 /* try to hand this ioctl down to the NIC drivers. 1130 */ 1131 return -ENOIOCTLCMD; 1132 } 1133 1134 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1135 { 1136 struct sock *sock; 1137 struct netlink_sock *nlk; 1138 1139 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 1140 if (!sock) 1141 return ERR_PTR(-ECONNREFUSED); 1142 1143 /* Don't bother queuing skb if kernel socket has no input function */ 1144 nlk = nlk_sk(sock); 1145 if (sock->sk_state == NETLINK_CONNECTED && 1146 nlk->dst_portid != nlk_sk(ssk)->portid) { 1147 sock_put(sock); 1148 return ERR_PTR(-ECONNREFUSED); 1149 } 1150 return sock; 1151 } 1152 1153 struct sock *netlink_getsockbyfilp(struct file *filp) 1154 { 1155 struct inode *inode = file_inode(filp); 1156 struct sock *sock; 1157 1158 if (!S_ISSOCK(inode->i_mode)) 1159 return ERR_PTR(-ENOTSOCK); 1160 1161 sock = SOCKET_I(inode)->sk; 1162 if (sock->sk_family != AF_NETLINK) 1163 return ERR_PTR(-EINVAL); 1164 1165 sock_hold(sock); 1166 return sock; 1167 } 1168 1169 static struct sk_buff *netlink_alloc_large_skb(unsigned int size, 1170 int broadcast) 1171 { 1172 struct sk_buff *skb; 1173 void *data; 1174 1175 if (size <= NLMSG_GOODSIZE || broadcast) 1176 return alloc_skb(size, GFP_KERNEL); 1177 1178 size = SKB_DATA_ALIGN(size) + 1179 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1180 1181 data = vmalloc(size); 1182 if (data == NULL) 1183 return NULL; 1184 1185 skb = __build_skb(data, size); 1186 if (skb == NULL) 1187 vfree(data); 1188 else 1189 skb->destructor = netlink_skb_destructor; 1190 1191 return skb; 1192 } 1193 1194 /* 1195 * Attach a skb to a netlink socket. 1196 * The caller must hold a reference to the destination socket. On error, the 1197 * reference is dropped. The skb is not send to the destination, just all 1198 * all error checks are performed and memory in the queue is reserved. 1199 * Return values: 1200 * < 0: error. skb freed, reference to sock dropped. 1201 * 0: continue 1202 * 1: repeat lookup - reference dropped while waiting for socket memory. 1203 */ 1204 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1205 long *timeo, struct sock *ssk) 1206 { 1207 struct netlink_sock *nlk; 1208 1209 nlk = nlk_sk(sk); 1210 1211 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1212 test_bit(NETLINK_S_CONGESTED, &nlk->state))) { 1213 DECLARE_WAITQUEUE(wait, current); 1214 if (!*timeo) { 1215 if (!ssk || netlink_is_kernel(ssk)) 1216 netlink_overrun(sk); 1217 sock_put(sk); 1218 kfree_skb(skb); 1219 return -EAGAIN; 1220 } 1221 1222 __set_current_state(TASK_INTERRUPTIBLE); 1223 add_wait_queue(&nlk->wait, &wait); 1224 1225 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1226 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1227 !sock_flag(sk, SOCK_DEAD)) 1228 *timeo = schedule_timeout(*timeo); 1229 1230 __set_current_state(TASK_RUNNING); 1231 remove_wait_queue(&nlk->wait, &wait); 1232 sock_put(sk); 1233 1234 if (signal_pending(current)) { 1235 kfree_skb(skb); 1236 return sock_intr_errno(*timeo); 1237 } 1238 return 1; 1239 } 1240 netlink_skb_set_owner_r(skb, sk); 1241 return 0; 1242 } 1243 1244 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1245 { 1246 int len = skb->len; 1247 1248 netlink_deliver_tap(sock_net(sk), skb); 1249 1250 skb_queue_tail(&sk->sk_receive_queue, skb); 1251 sk->sk_data_ready(sk); 1252 return len; 1253 } 1254 1255 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1256 { 1257 int len = __netlink_sendskb(sk, skb); 1258 1259 sock_put(sk); 1260 return len; 1261 } 1262 1263 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 1264 { 1265 kfree_skb(skb); 1266 sock_put(sk); 1267 } 1268 1269 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 1270 { 1271 int delta; 1272 1273 WARN_ON(skb->sk != NULL); 1274 delta = skb->end - skb->tail; 1275 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1276 return skb; 1277 1278 if (skb_shared(skb)) { 1279 struct sk_buff *nskb = skb_clone(skb, allocation); 1280 if (!nskb) 1281 return skb; 1282 consume_skb(skb); 1283 skb = nskb; 1284 } 1285 1286 pskb_expand_head(skb, 0, -delta, 1287 (allocation & ~__GFP_DIRECT_RECLAIM) | 1288 __GFP_NOWARN | __GFP_NORETRY); 1289 return skb; 1290 } 1291 1292 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1293 struct sock *ssk) 1294 { 1295 int ret; 1296 struct netlink_sock *nlk = nlk_sk(sk); 1297 1298 ret = -ECONNREFUSED; 1299 if (nlk->netlink_rcv != NULL) { 1300 ret = skb->len; 1301 netlink_skb_set_owner_r(skb, sk); 1302 NETLINK_CB(skb).sk = ssk; 1303 netlink_deliver_tap_kernel(sk, ssk, skb); 1304 nlk->netlink_rcv(skb); 1305 consume_skb(skb); 1306 } else { 1307 kfree_skb(skb); 1308 } 1309 sock_put(sk); 1310 return ret; 1311 } 1312 1313 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 1314 u32 portid, int nonblock) 1315 { 1316 struct sock *sk; 1317 int err; 1318 long timeo; 1319 1320 skb = netlink_trim(skb, gfp_any()); 1321 1322 timeo = sock_sndtimeo(ssk, nonblock); 1323 retry: 1324 sk = netlink_getsockbyportid(ssk, portid); 1325 if (IS_ERR(sk)) { 1326 kfree_skb(skb); 1327 return PTR_ERR(sk); 1328 } 1329 if (netlink_is_kernel(sk)) 1330 return netlink_unicast_kernel(sk, skb, ssk); 1331 1332 if (sk_filter(sk, skb)) { 1333 err = skb->len; 1334 kfree_skb(skb); 1335 sock_put(sk); 1336 return err; 1337 } 1338 1339 err = netlink_attachskb(sk, skb, &timeo, ssk); 1340 if (err == 1) 1341 goto retry; 1342 if (err) 1343 return err; 1344 1345 return netlink_sendskb(sk, skb); 1346 } 1347 EXPORT_SYMBOL(netlink_unicast); 1348 1349 int netlink_has_listeners(struct sock *sk, unsigned int group) 1350 { 1351 int res = 0; 1352 struct listeners *listeners; 1353 1354 BUG_ON(!netlink_is_kernel(sk)); 1355 1356 rcu_read_lock(); 1357 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 1358 1359 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 1360 res = test_bit(group - 1, listeners->masks); 1361 1362 rcu_read_unlock(); 1363 1364 return res; 1365 } 1366 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1367 1368 bool netlink_strict_get_check(struct sk_buff *skb) 1369 { 1370 const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk); 1371 1372 return nlk->flags & NETLINK_F_STRICT_CHK; 1373 } 1374 EXPORT_SYMBOL_GPL(netlink_strict_get_check); 1375 1376 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1377 { 1378 struct netlink_sock *nlk = nlk_sk(sk); 1379 1380 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1381 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1382 netlink_skb_set_owner_r(skb, sk); 1383 __netlink_sendskb(sk, skb); 1384 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1385 } 1386 return -1; 1387 } 1388 1389 struct netlink_broadcast_data { 1390 struct sock *exclude_sk; 1391 struct net *net; 1392 u32 portid; 1393 u32 group; 1394 int failure; 1395 int delivery_failure; 1396 int congested; 1397 int delivered; 1398 gfp_t allocation; 1399 struct sk_buff *skb, *skb2; 1400 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); 1401 void *tx_data; 1402 }; 1403 1404 static void do_one_broadcast(struct sock *sk, 1405 struct netlink_broadcast_data *p) 1406 { 1407 struct netlink_sock *nlk = nlk_sk(sk); 1408 int val; 1409 1410 if (p->exclude_sk == sk) 1411 return; 1412 1413 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1414 !test_bit(p->group - 1, nlk->groups)) 1415 return; 1416 1417 if (!net_eq(sock_net(sk), p->net)) { 1418 if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) 1419 return; 1420 1421 if (!peernet_has_id(sock_net(sk), p->net)) 1422 return; 1423 1424 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, 1425 CAP_NET_BROADCAST)) 1426 return; 1427 } 1428 1429 if (p->failure) { 1430 netlink_overrun(sk); 1431 return; 1432 } 1433 1434 sock_hold(sk); 1435 if (p->skb2 == NULL) { 1436 if (skb_shared(p->skb)) { 1437 p->skb2 = skb_clone(p->skb, p->allocation); 1438 } else { 1439 p->skb2 = skb_get(p->skb); 1440 /* 1441 * skb ownership may have been set when 1442 * delivered to a previous socket. 1443 */ 1444 skb_orphan(p->skb2); 1445 } 1446 } 1447 if (p->skb2 == NULL) { 1448 netlink_overrun(sk); 1449 /* Clone failed. Notify ALL listeners. */ 1450 p->failure = 1; 1451 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1452 p->delivery_failure = 1; 1453 goto out; 1454 } 1455 if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1456 kfree_skb(p->skb2); 1457 p->skb2 = NULL; 1458 goto out; 1459 } 1460 if (sk_filter(sk, p->skb2)) { 1461 kfree_skb(p->skb2); 1462 p->skb2 = NULL; 1463 goto out; 1464 } 1465 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); 1466 if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) 1467 NETLINK_CB(p->skb2).nsid_is_set = true; 1468 val = netlink_broadcast_deliver(sk, p->skb2); 1469 if (val < 0) { 1470 netlink_overrun(sk); 1471 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1472 p->delivery_failure = 1; 1473 } else { 1474 p->congested |= val; 1475 p->delivered = 1; 1476 p->skb2 = NULL; 1477 } 1478 out: 1479 sock_put(sk); 1480 } 1481 1482 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, 1483 u32 group, gfp_t allocation, 1484 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), 1485 void *filter_data) 1486 { 1487 struct net *net = sock_net(ssk); 1488 struct netlink_broadcast_data info; 1489 struct sock *sk; 1490 1491 skb = netlink_trim(skb, allocation); 1492 1493 info.exclude_sk = ssk; 1494 info.net = net; 1495 info.portid = portid; 1496 info.group = group; 1497 info.failure = 0; 1498 info.delivery_failure = 0; 1499 info.congested = 0; 1500 info.delivered = 0; 1501 info.allocation = allocation; 1502 info.skb = skb; 1503 info.skb2 = NULL; 1504 info.tx_filter = filter; 1505 info.tx_data = filter_data; 1506 1507 /* While we sleep in clone, do not allow to change socket list */ 1508 1509 netlink_lock_table(); 1510 1511 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1512 do_one_broadcast(sk, &info); 1513 1514 consume_skb(skb); 1515 1516 netlink_unlock_table(); 1517 1518 if (info.delivery_failure) { 1519 kfree_skb(info.skb2); 1520 return -ENOBUFS; 1521 } 1522 consume_skb(info.skb2); 1523 1524 if (info.delivered) { 1525 if (info.congested && gfpflags_allow_blocking(allocation)) 1526 yield(); 1527 return 0; 1528 } 1529 return -ESRCH; 1530 } 1531 EXPORT_SYMBOL(netlink_broadcast_filtered); 1532 1533 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1534 u32 group, gfp_t allocation) 1535 { 1536 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, 1537 NULL, NULL); 1538 } 1539 EXPORT_SYMBOL(netlink_broadcast); 1540 1541 struct netlink_set_err_data { 1542 struct sock *exclude_sk; 1543 u32 portid; 1544 u32 group; 1545 int code; 1546 }; 1547 1548 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1549 { 1550 struct netlink_sock *nlk = nlk_sk(sk); 1551 int ret = 0; 1552 1553 if (sk == p->exclude_sk) 1554 goto out; 1555 1556 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1557 goto out; 1558 1559 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1560 !test_bit(p->group - 1, nlk->groups)) 1561 goto out; 1562 1563 if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) { 1564 ret = 1; 1565 goto out; 1566 } 1567 1568 sk->sk_err = p->code; 1569 sk->sk_error_report(sk); 1570 out: 1571 return ret; 1572 } 1573 1574 /** 1575 * netlink_set_err - report error to broadcast listeners 1576 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1577 * @portid: the PORTID of a process that we want to skip (if any) 1578 * @group: the broadcast group that will notice the error 1579 * @code: error code, must be negative (as usual in kernelspace) 1580 * 1581 * This function returns the number of broadcast listeners that have set the 1582 * NETLINK_NO_ENOBUFS socket option. 1583 */ 1584 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1585 { 1586 struct netlink_set_err_data info; 1587 struct sock *sk; 1588 int ret = 0; 1589 1590 info.exclude_sk = ssk; 1591 info.portid = portid; 1592 info.group = group; 1593 /* sk->sk_err wants a positive error value */ 1594 info.code = -code; 1595 1596 read_lock(&nl_table_lock); 1597 1598 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1599 ret += do_one_set_err(sk, &info); 1600 1601 read_unlock(&nl_table_lock); 1602 return ret; 1603 } 1604 EXPORT_SYMBOL(netlink_set_err); 1605 1606 /* must be called with netlink table grabbed */ 1607 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1608 unsigned int group, 1609 int is_new) 1610 { 1611 int old, new = !!is_new, subscriptions; 1612 1613 old = test_bit(group - 1, nlk->groups); 1614 subscriptions = nlk->subscriptions - old + new; 1615 if (new) 1616 __set_bit(group - 1, nlk->groups); 1617 else 1618 __clear_bit(group - 1, nlk->groups); 1619 netlink_update_subscriptions(&nlk->sk, subscriptions); 1620 netlink_update_listeners(&nlk->sk); 1621 } 1622 1623 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1624 sockptr_t optval, unsigned int optlen) 1625 { 1626 struct sock *sk = sock->sk; 1627 struct netlink_sock *nlk = nlk_sk(sk); 1628 unsigned int val = 0; 1629 int err; 1630 1631 if (level != SOL_NETLINK) 1632 return -ENOPROTOOPT; 1633 1634 if (optlen >= sizeof(int) && 1635 copy_from_sockptr(&val, optval, sizeof(val))) 1636 return -EFAULT; 1637 1638 switch (optname) { 1639 case NETLINK_PKTINFO: 1640 if (val) 1641 nlk->flags |= NETLINK_F_RECV_PKTINFO; 1642 else 1643 nlk->flags &= ~NETLINK_F_RECV_PKTINFO; 1644 err = 0; 1645 break; 1646 case NETLINK_ADD_MEMBERSHIP: 1647 case NETLINK_DROP_MEMBERSHIP: { 1648 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1649 return -EPERM; 1650 err = netlink_realloc_groups(sk); 1651 if (err) 1652 return err; 1653 if (!val || val - 1 >= nlk->ngroups) 1654 return -EINVAL; 1655 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { 1656 err = nlk->netlink_bind(sock_net(sk), val); 1657 if (err) 1658 return err; 1659 } 1660 netlink_table_grab(); 1661 netlink_update_socket_mc(nlk, val, 1662 optname == NETLINK_ADD_MEMBERSHIP); 1663 netlink_table_ungrab(); 1664 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) 1665 nlk->netlink_unbind(sock_net(sk), val); 1666 1667 err = 0; 1668 break; 1669 } 1670 case NETLINK_BROADCAST_ERROR: 1671 if (val) 1672 nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR; 1673 else 1674 nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR; 1675 err = 0; 1676 break; 1677 case NETLINK_NO_ENOBUFS: 1678 if (val) { 1679 nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS; 1680 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 1681 wake_up_interruptible(&nlk->wait); 1682 } else { 1683 nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS; 1684 } 1685 err = 0; 1686 break; 1687 case NETLINK_LISTEN_ALL_NSID: 1688 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1689 return -EPERM; 1690 1691 if (val) 1692 nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; 1693 else 1694 nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; 1695 err = 0; 1696 break; 1697 case NETLINK_CAP_ACK: 1698 if (val) 1699 nlk->flags |= NETLINK_F_CAP_ACK; 1700 else 1701 nlk->flags &= ~NETLINK_F_CAP_ACK; 1702 err = 0; 1703 break; 1704 case NETLINK_EXT_ACK: 1705 if (val) 1706 nlk->flags |= NETLINK_F_EXT_ACK; 1707 else 1708 nlk->flags &= ~NETLINK_F_EXT_ACK; 1709 err = 0; 1710 break; 1711 case NETLINK_GET_STRICT_CHK: 1712 if (val) 1713 nlk->flags |= NETLINK_F_STRICT_CHK; 1714 else 1715 nlk->flags &= ~NETLINK_F_STRICT_CHK; 1716 err = 0; 1717 break; 1718 default: 1719 err = -ENOPROTOOPT; 1720 } 1721 return err; 1722 } 1723 1724 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1725 char __user *optval, int __user *optlen) 1726 { 1727 struct sock *sk = sock->sk; 1728 struct netlink_sock *nlk = nlk_sk(sk); 1729 int len, val, err; 1730 1731 if (level != SOL_NETLINK) 1732 return -ENOPROTOOPT; 1733 1734 if (get_user(len, optlen)) 1735 return -EFAULT; 1736 if (len < 0) 1737 return -EINVAL; 1738 1739 switch (optname) { 1740 case NETLINK_PKTINFO: 1741 if (len < sizeof(int)) 1742 return -EINVAL; 1743 len = sizeof(int); 1744 val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0; 1745 if (put_user(len, optlen) || 1746 put_user(val, optval)) 1747 return -EFAULT; 1748 err = 0; 1749 break; 1750 case NETLINK_BROADCAST_ERROR: 1751 if (len < sizeof(int)) 1752 return -EINVAL; 1753 len = sizeof(int); 1754 val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0; 1755 if (put_user(len, optlen) || 1756 put_user(val, optval)) 1757 return -EFAULT; 1758 err = 0; 1759 break; 1760 case NETLINK_NO_ENOBUFS: 1761 if (len < sizeof(int)) 1762 return -EINVAL; 1763 len = sizeof(int); 1764 val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0; 1765 if (put_user(len, optlen) || 1766 put_user(val, optval)) 1767 return -EFAULT; 1768 err = 0; 1769 break; 1770 case NETLINK_LIST_MEMBERSHIPS: { 1771 int pos, idx, shift; 1772 1773 err = 0; 1774 netlink_lock_table(); 1775 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 1776 if (len - pos < sizeof(u32)) 1777 break; 1778 1779 idx = pos / sizeof(unsigned long); 1780 shift = (pos % sizeof(unsigned long)) * 8; 1781 if (put_user((u32)(nlk->groups[idx] >> shift), 1782 (u32 __user *)(optval + pos))) { 1783 err = -EFAULT; 1784 break; 1785 } 1786 } 1787 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) 1788 err = -EFAULT; 1789 netlink_unlock_table(); 1790 break; 1791 } 1792 case NETLINK_CAP_ACK: 1793 if (len < sizeof(int)) 1794 return -EINVAL; 1795 len = sizeof(int); 1796 val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0; 1797 if (put_user(len, optlen) || 1798 put_user(val, optval)) 1799 return -EFAULT; 1800 err = 0; 1801 break; 1802 case NETLINK_EXT_ACK: 1803 if (len < sizeof(int)) 1804 return -EINVAL; 1805 len = sizeof(int); 1806 val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0; 1807 if (put_user(len, optlen) || put_user(val, optval)) 1808 return -EFAULT; 1809 err = 0; 1810 break; 1811 case NETLINK_GET_STRICT_CHK: 1812 if (len < sizeof(int)) 1813 return -EINVAL; 1814 len = sizeof(int); 1815 val = nlk->flags & NETLINK_F_STRICT_CHK ? 1 : 0; 1816 if (put_user(len, optlen) || put_user(val, optval)) 1817 return -EFAULT; 1818 err = 0; 1819 break; 1820 default: 1821 err = -ENOPROTOOPT; 1822 } 1823 return err; 1824 } 1825 1826 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1827 { 1828 struct nl_pktinfo info; 1829 1830 info.group = NETLINK_CB(skb).dst_group; 1831 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1832 } 1833 1834 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, 1835 struct sk_buff *skb) 1836 { 1837 if (!NETLINK_CB(skb).nsid_is_set) 1838 return; 1839 1840 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), 1841 &NETLINK_CB(skb).nsid); 1842 } 1843 1844 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1845 { 1846 struct sock *sk = sock->sk; 1847 struct netlink_sock *nlk = nlk_sk(sk); 1848 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1849 u32 dst_portid; 1850 u32 dst_group; 1851 struct sk_buff *skb; 1852 int err; 1853 struct scm_cookie scm; 1854 u32 netlink_skb_flags = 0; 1855 1856 if (msg->msg_flags & MSG_OOB) 1857 return -EOPNOTSUPP; 1858 1859 err = scm_send(sock, msg, &scm, true); 1860 if (err < 0) 1861 return err; 1862 1863 if (msg->msg_namelen) { 1864 err = -EINVAL; 1865 if (msg->msg_namelen < sizeof(struct sockaddr_nl)) 1866 goto out; 1867 if (addr->nl_family != AF_NETLINK) 1868 goto out; 1869 dst_portid = addr->nl_pid; 1870 dst_group = ffs(addr->nl_groups); 1871 err = -EPERM; 1872 if ((dst_group || dst_portid) && 1873 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1874 goto out; 1875 netlink_skb_flags |= NETLINK_SKB_DST; 1876 } else { 1877 dst_portid = nlk->dst_portid; 1878 dst_group = nlk->dst_group; 1879 } 1880 1881 if (!nlk->bound) { 1882 err = netlink_autobind(sock); 1883 if (err) 1884 goto out; 1885 } else { 1886 /* Ensure nlk is hashed and visible. */ 1887 smp_rmb(); 1888 } 1889 1890 err = -EMSGSIZE; 1891 if (len > sk->sk_sndbuf - 32) 1892 goto out; 1893 err = -ENOBUFS; 1894 skb = netlink_alloc_large_skb(len, dst_group); 1895 if (skb == NULL) 1896 goto out; 1897 1898 NETLINK_CB(skb).portid = nlk->portid; 1899 NETLINK_CB(skb).dst_group = dst_group; 1900 NETLINK_CB(skb).creds = scm.creds; 1901 NETLINK_CB(skb).flags = netlink_skb_flags; 1902 1903 err = -EFAULT; 1904 if (memcpy_from_msg(skb_put(skb, len), msg, len)) { 1905 kfree_skb(skb); 1906 goto out; 1907 } 1908 1909 err = security_netlink_send(sk, skb); 1910 if (err) { 1911 kfree_skb(skb); 1912 goto out; 1913 } 1914 1915 if (dst_group) { 1916 refcount_inc(&skb->users); 1917 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 1918 } 1919 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags & MSG_DONTWAIT); 1920 1921 out: 1922 scm_destroy(&scm); 1923 return err; 1924 } 1925 1926 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1927 int flags) 1928 { 1929 struct scm_cookie scm; 1930 struct sock *sk = sock->sk; 1931 struct netlink_sock *nlk = nlk_sk(sk); 1932 int noblock = flags & MSG_DONTWAIT; 1933 size_t copied; 1934 struct sk_buff *skb, *data_skb; 1935 int err, ret; 1936 1937 if (flags & MSG_OOB) 1938 return -EOPNOTSUPP; 1939 1940 copied = 0; 1941 1942 skb = skb_recv_datagram(sk, flags, noblock, &err); 1943 if (skb == NULL) 1944 goto out; 1945 1946 data_skb = skb; 1947 1948 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1949 if (unlikely(skb_shinfo(skb)->frag_list)) { 1950 /* 1951 * If this skb has a frag_list, then here that means that we 1952 * will have to use the frag_list skb's data for compat tasks 1953 * and the regular skb's data for normal (non-compat) tasks. 1954 * 1955 * If we need to send the compat skb, assign it to the 1956 * 'data_skb' variable so that it will be used below for data 1957 * copying. We keep 'skb' for everything else, including 1958 * freeing both later. 1959 */ 1960 if (flags & MSG_CMSG_COMPAT) 1961 data_skb = skb_shinfo(skb)->frag_list; 1962 } 1963 #endif 1964 1965 /* Record the max length of recvmsg() calls for future allocations */ 1966 nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); 1967 nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, 1968 SKB_WITH_OVERHEAD(32768)); 1969 1970 copied = data_skb->len; 1971 if (len < copied) { 1972 msg->msg_flags |= MSG_TRUNC; 1973 copied = len; 1974 } 1975 1976 skb_reset_transport_header(data_skb); 1977 err = skb_copy_datagram_msg(data_skb, 0, msg, copied); 1978 1979 if (msg->msg_name) { 1980 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1981 addr->nl_family = AF_NETLINK; 1982 addr->nl_pad = 0; 1983 addr->nl_pid = NETLINK_CB(skb).portid; 1984 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1985 msg->msg_namelen = sizeof(*addr); 1986 } 1987 1988 if (nlk->flags & NETLINK_F_RECV_PKTINFO) 1989 netlink_cmsg_recv_pktinfo(msg, skb); 1990 if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) 1991 netlink_cmsg_listen_all_nsid(sk, msg, skb); 1992 1993 memset(&scm, 0, sizeof(scm)); 1994 scm.creds = *NETLINK_CREDS(skb); 1995 if (flags & MSG_TRUNC) 1996 copied = data_skb->len; 1997 1998 skb_free_datagram(sk, skb); 1999 2000 if (nlk->cb_running && 2001 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 2002 ret = netlink_dump(sk); 2003 if (ret) { 2004 sk->sk_err = -ret; 2005 sk->sk_error_report(sk); 2006 } 2007 } 2008 2009 scm_recv(sock, msg, &scm, flags); 2010 out: 2011 netlink_rcv_wake(sk); 2012 return err ? : copied; 2013 } 2014 2015 static void netlink_data_ready(struct sock *sk) 2016 { 2017 BUG(); 2018 } 2019 2020 /* 2021 * We export these functions to other modules. They provide a 2022 * complete set of kernel non-blocking support for message 2023 * queueing. 2024 */ 2025 2026 struct sock * 2027 __netlink_kernel_create(struct net *net, int unit, struct module *module, 2028 struct netlink_kernel_cfg *cfg) 2029 { 2030 struct socket *sock; 2031 struct sock *sk; 2032 struct netlink_sock *nlk; 2033 struct listeners *listeners = NULL; 2034 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; 2035 unsigned int groups; 2036 2037 BUG_ON(!nl_table); 2038 2039 if (unit < 0 || unit >= MAX_LINKS) 2040 return NULL; 2041 2042 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 2043 return NULL; 2044 2045 if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) 2046 goto out_sock_release_nosk; 2047 2048 sk = sock->sk; 2049 2050 if (!cfg || cfg->groups < 32) 2051 groups = 32; 2052 else 2053 groups = cfg->groups; 2054 2055 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2056 if (!listeners) 2057 goto out_sock_release; 2058 2059 sk->sk_data_ready = netlink_data_ready; 2060 if (cfg && cfg->input) 2061 nlk_sk(sk)->netlink_rcv = cfg->input; 2062 2063 if (netlink_insert(sk, 0)) 2064 goto out_sock_release; 2065 2066 nlk = nlk_sk(sk); 2067 nlk->flags |= NETLINK_F_KERNEL_SOCKET; 2068 2069 netlink_table_grab(); 2070 if (!nl_table[unit].registered) { 2071 nl_table[unit].groups = groups; 2072 rcu_assign_pointer(nl_table[unit].listeners, listeners); 2073 nl_table[unit].cb_mutex = cb_mutex; 2074 nl_table[unit].module = module; 2075 if (cfg) { 2076 nl_table[unit].bind = cfg->bind; 2077 nl_table[unit].unbind = cfg->unbind; 2078 nl_table[unit].flags = cfg->flags; 2079 if (cfg->compare) 2080 nl_table[unit].compare = cfg->compare; 2081 } 2082 nl_table[unit].registered = 1; 2083 } else { 2084 kfree(listeners); 2085 nl_table[unit].registered++; 2086 } 2087 netlink_table_ungrab(); 2088 return sk; 2089 2090 out_sock_release: 2091 kfree(listeners); 2092 netlink_kernel_release(sk); 2093 return NULL; 2094 2095 out_sock_release_nosk: 2096 sock_release(sock); 2097 return NULL; 2098 } 2099 EXPORT_SYMBOL(__netlink_kernel_create); 2100 2101 void 2102 netlink_kernel_release(struct sock *sk) 2103 { 2104 if (sk == NULL || sk->sk_socket == NULL) 2105 return; 2106 2107 sock_release(sk->sk_socket); 2108 } 2109 EXPORT_SYMBOL(netlink_kernel_release); 2110 2111 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 2112 { 2113 struct listeners *new, *old; 2114 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 2115 2116 if (groups < 32) 2117 groups = 32; 2118 2119 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 2120 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 2121 if (!new) 2122 return -ENOMEM; 2123 old = nl_deref_protected(tbl->listeners); 2124 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 2125 rcu_assign_pointer(tbl->listeners, new); 2126 2127 kfree_rcu(old, rcu); 2128 } 2129 tbl->groups = groups; 2130 2131 return 0; 2132 } 2133 2134 /** 2135 * netlink_change_ngroups - change number of multicast groups 2136 * 2137 * This changes the number of multicast groups that are available 2138 * on a certain netlink family. Note that it is not possible to 2139 * change the number of groups to below 32. Also note that it does 2140 * not implicitly call netlink_clear_multicast_users() when the 2141 * number of groups is reduced. 2142 * 2143 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 2144 * @groups: The new number of groups. 2145 */ 2146 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 2147 { 2148 int err; 2149 2150 netlink_table_grab(); 2151 err = __netlink_change_ngroups(sk, groups); 2152 netlink_table_ungrab(); 2153 2154 return err; 2155 } 2156 2157 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2158 { 2159 struct sock *sk; 2160 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 2161 2162 sk_for_each_bound(sk, &tbl->mc_list) 2163 netlink_update_socket_mc(nlk_sk(sk), group, 0); 2164 } 2165 2166 struct nlmsghdr * 2167 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2168 { 2169 struct nlmsghdr *nlh; 2170 int size = nlmsg_msg_size(len); 2171 2172 nlh = skb_put(skb, NLMSG_ALIGN(size)); 2173 nlh->nlmsg_type = type; 2174 nlh->nlmsg_len = size; 2175 nlh->nlmsg_flags = flags; 2176 nlh->nlmsg_pid = portid; 2177 nlh->nlmsg_seq = seq; 2178 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2179 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2180 return nlh; 2181 } 2182 EXPORT_SYMBOL(__nlmsg_put); 2183 2184 /* 2185 * It looks a bit ugly. 2186 * It would be better to create kernel thread. 2187 */ 2188 2189 static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, 2190 struct netlink_callback *cb, 2191 struct netlink_ext_ack *extack) 2192 { 2193 struct nlmsghdr *nlh; 2194 2195 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno), 2196 NLM_F_MULTI | cb->answer_flags); 2197 if (WARN_ON(!nlh)) 2198 return -ENOBUFS; 2199 2200 nl_dump_check_consistent(cb, nlh); 2201 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno)); 2202 2203 if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) { 2204 nlh->nlmsg_flags |= NLM_F_ACK_TLVS; 2205 if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)) 2206 nlmsg_end(skb, nlh); 2207 } 2208 2209 return 0; 2210 } 2211 2212 static int netlink_dump(struct sock *sk) 2213 { 2214 struct netlink_sock *nlk = nlk_sk(sk); 2215 struct netlink_ext_ack extack = {}; 2216 struct netlink_callback *cb; 2217 struct sk_buff *skb = NULL; 2218 struct module *module; 2219 int err = -ENOBUFS; 2220 int alloc_min_size; 2221 int alloc_size; 2222 2223 mutex_lock(nlk->cb_mutex); 2224 if (!nlk->cb_running) { 2225 err = -EINVAL; 2226 goto errout_skb; 2227 } 2228 2229 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2230 goto errout_skb; 2231 2232 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2233 * required, but it makes sense to _attempt_ a 16K bytes allocation 2234 * to reduce number of system calls on dump operations, if user 2235 * ever provided a big enough buffer. 2236 */ 2237 cb = &nlk->cb; 2238 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2239 2240 if (alloc_min_size < nlk->max_recvmsg_len) { 2241 alloc_size = nlk->max_recvmsg_len; 2242 skb = alloc_skb(alloc_size, 2243 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | 2244 __GFP_NOWARN | __GFP_NORETRY); 2245 } 2246 if (!skb) { 2247 alloc_size = alloc_min_size; 2248 skb = alloc_skb(alloc_size, GFP_KERNEL); 2249 } 2250 if (!skb) 2251 goto errout_skb; 2252 2253 /* Trim skb to allocated size. User is expected to provide buffer as 2254 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at 2255 * netlink_recvmsg())). dump will pack as many smaller messages as 2256 * could fit within the allocated skb. skb is typically allocated 2257 * with larger space than required (could be as much as near 2x the 2258 * requested size with align to next power of 2 approach). Allowing 2259 * dump to use the excess space makes it difficult for a user to have a 2260 * reasonable static buffer based on the expected largest dump of a 2261 * single netdev. The outcome is MSG_TRUNC error. 2262 */ 2263 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2264 netlink_skb_set_owner_r(skb, sk); 2265 2266 if (nlk->dump_done_errno > 0) { 2267 cb->extack = &extack; 2268 nlk->dump_done_errno = cb->dump(skb, cb); 2269 cb->extack = NULL; 2270 } 2271 2272 if (nlk->dump_done_errno > 0 || 2273 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { 2274 mutex_unlock(nlk->cb_mutex); 2275 2276 if (sk_filter(sk, skb)) 2277 kfree_skb(skb); 2278 else 2279 __netlink_sendskb(sk, skb); 2280 return 0; 2281 } 2282 2283 if (netlink_dump_done(nlk, skb, cb, &extack)) 2284 goto errout_skb; 2285 2286 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 2287 /* frag_list skb's data is used for compat tasks 2288 * and the regular skb's data for normal (non-compat) tasks. 2289 * See netlink_recvmsg(). 2290 */ 2291 if (unlikely(skb_shinfo(skb)->frag_list)) { 2292 if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack)) 2293 goto errout_skb; 2294 } 2295 #endif 2296 2297 if (sk_filter(sk, skb)) 2298 kfree_skb(skb); 2299 else 2300 __netlink_sendskb(sk, skb); 2301 2302 if (cb->done) 2303 cb->done(cb); 2304 2305 nlk->cb_running = false; 2306 module = cb->module; 2307 skb = cb->skb; 2308 mutex_unlock(nlk->cb_mutex); 2309 module_put(module); 2310 consume_skb(skb); 2311 return 0; 2312 2313 errout_skb: 2314 mutex_unlock(nlk->cb_mutex); 2315 kfree_skb(skb); 2316 return err; 2317 } 2318 2319 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 2320 const struct nlmsghdr *nlh, 2321 struct netlink_dump_control *control) 2322 { 2323 struct netlink_sock *nlk, *nlk2; 2324 struct netlink_callback *cb; 2325 struct sock *sk; 2326 int ret; 2327 2328 refcount_inc(&skb->users); 2329 2330 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2331 if (sk == NULL) { 2332 ret = -ECONNREFUSED; 2333 goto error_free; 2334 } 2335 2336 nlk = nlk_sk(sk); 2337 mutex_lock(nlk->cb_mutex); 2338 /* A dump is in progress... */ 2339 if (nlk->cb_running) { 2340 ret = -EBUSY; 2341 goto error_unlock; 2342 } 2343 /* add reference of module which cb->dump belongs to */ 2344 if (!try_module_get(control->module)) { 2345 ret = -EPROTONOSUPPORT; 2346 goto error_unlock; 2347 } 2348 2349 cb = &nlk->cb; 2350 memset(cb, 0, sizeof(*cb)); 2351 cb->dump = control->dump; 2352 cb->done = control->done; 2353 cb->nlh = nlh; 2354 cb->data = control->data; 2355 cb->module = control->module; 2356 cb->min_dump_alloc = control->min_dump_alloc; 2357 cb->skb = skb; 2358 2359 nlk2 = nlk_sk(NETLINK_CB(skb).sk); 2360 cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK); 2361 2362 if (control->start) { 2363 ret = control->start(cb); 2364 if (ret) 2365 goto error_put; 2366 } 2367 2368 nlk->cb_running = true; 2369 nlk->dump_done_errno = INT_MAX; 2370 2371 mutex_unlock(nlk->cb_mutex); 2372 2373 ret = netlink_dump(sk); 2374 2375 sock_put(sk); 2376 2377 if (ret) 2378 return ret; 2379 2380 /* We successfully started a dump, by returning -EINTR we 2381 * signal not to send ACK even if it was requested. 2382 */ 2383 return -EINTR; 2384 2385 error_put: 2386 module_put(control->module); 2387 error_unlock: 2388 sock_put(sk); 2389 mutex_unlock(nlk->cb_mutex); 2390 error_free: 2391 kfree_skb(skb); 2392 return ret; 2393 } 2394 EXPORT_SYMBOL(__netlink_dump_start); 2395 2396 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, 2397 const struct netlink_ext_ack *extack) 2398 { 2399 struct sk_buff *skb; 2400 struct nlmsghdr *rep; 2401 struct nlmsgerr *errmsg; 2402 size_t payload = sizeof(*errmsg); 2403 size_t tlvlen = 0; 2404 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); 2405 unsigned int flags = 0; 2406 bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK; 2407 2408 /* Error messages get the original request appened, unless the user 2409 * requests to cap the error message, and get extra error data if 2410 * requested. 2411 */ 2412 if (nlk_has_extack && extack && extack->_msg) 2413 tlvlen += nla_total_size(strlen(extack->_msg) + 1); 2414 2415 if (err && !(nlk->flags & NETLINK_F_CAP_ACK)) 2416 payload += nlmsg_len(nlh); 2417 else 2418 flags |= NLM_F_CAPPED; 2419 if (err && nlk_has_extack && extack && extack->bad_attr) 2420 tlvlen += nla_total_size(sizeof(u32)); 2421 if (nlk_has_extack && extack && extack->cookie_len) 2422 tlvlen += nla_total_size(extack->cookie_len); 2423 if (err && nlk_has_extack && extack && extack->policy) 2424 tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy); 2425 2426 if (tlvlen) 2427 flags |= NLM_F_ACK_TLVS; 2428 2429 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); 2430 if (!skb) { 2431 NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; 2432 NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk); 2433 return; 2434 } 2435 2436 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2437 NLMSG_ERROR, payload, flags); 2438 errmsg = nlmsg_data(rep); 2439 errmsg->error = err; 2440 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); 2441 2442 if (nlk_has_extack && extack) { 2443 if (extack->_msg) { 2444 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, 2445 extack->_msg)); 2446 } 2447 if (err && extack->bad_attr && 2448 !WARN_ON((u8 *)extack->bad_attr < in_skb->data || 2449 (u8 *)extack->bad_attr >= in_skb->data + 2450 in_skb->len)) 2451 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, 2452 (u8 *)extack->bad_attr - 2453 (u8 *)nlh)); 2454 if (extack->cookie_len) 2455 WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, 2456 extack->cookie_len, extack->cookie)); 2457 if (extack->policy) 2458 netlink_policy_dump_write_attr(skb, extack->policy, 2459 NLMSGERR_ATTR_POLICY); 2460 } 2461 2462 nlmsg_end(skb, rep); 2463 2464 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); 2465 } 2466 EXPORT_SYMBOL(netlink_ack); 2467 2468 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 2469 struct nlmsghdr *, 2470 struct netlink_ext_ack *)) 2471 { 2472 struct netlink_ext_ack extack; 2473 struct nlmsghdr *nlh; 2474 int err; 2475 2476 while (skb->len >= nlmsg_total_size(0)) { 2477 int msglen; 2478 2479 memset(&extack, 0, sizeof(extack)); 2480 nlh = nlmsg_hdr(skb); 2481 err = 0; 2482 2483 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 2484 return 0; 2485 2486 /* Only requests are handled by the kernel */ 2487 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 2488 goto ack; 2489 2490 /* Skip control messages */ 2491 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 2492 goto ack; 2493 2494 err = cb(skb, nlh, &extack); 2495 if (err == -EINTR) 2496 goto skip; 2497 2498 ack: 2499 if (nlh->nlmsg_flags & NLM_F_ACK || err) 2500 netlink_ack(skb, nlh, err, &extack); 2501 2502 skip: 2503 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 2504 if (msglen > skb->len) 2505 msglen = skb->len; 2506 skb_pull(skb, msglen); 2507 } 2508 2509 return 0; 2510 } 2511 EXPORT_SYMBOL(netlink_rcv_skb); 2512 2513 /** 2514 * nlmsg_notify - send a notification netlink message 2515 * @sk: netlink socket to use 2516 * @skb: notification message 2517 * @portid: destination netlink portid for reports or 0 2518 * @group: destination multicast group or 0 2519 * @report: 1 to report back, 0 to disable 2520 * @flags: allocation flags 2521 */ 2522 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 2523 unsigned int group, int report, gfp_t flags) 2524 { 2525 int err = 0; 2526 2527 if (group) { 2528 int exclude_portid = 0; 2529 2530 if (report) { 2531 refcount_inc(&skb->users); 2532 exclude_portid = portid; 2533 } 2534 2535 /* errors reported via destination sk->sk_err, but propagate 2536 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 2537 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 2538 } 2539 2540 if (report) { 2541 int err2; 2542 2543 err2 = nlmsg_unicast(sk, skb, portid); 2544 if (!err || err == -ESRCH) 2545 err = err2; 2546 } 2547 2548 return err; 2549 } 2550 EXPORT_SYMBOL(nlmsg_notify); 2551 2552 #ifdef CONFIG_PROC_FS 2553 struct nl_seq_iter { 2554 struct seq_net_private p; 2555 struct rhashtable_iter hti; 2556 int link; 2557 }; 2558 2559 static void netlink_walk_start(struct nl_seq_iter *iter) 2560 { 2561 rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti); 2562 rhashtable_walk_start(&iter->hti); 2563 } 2564 2565 static void netlink_walk_stop(struct nl_seq_iter *iter) 2566 { 2567 rhashtable_walk_stop(&iter->hti); 2568 rhashtable_walk_exit(&iter->hti); 2569 } 2570 2571 static void *__netlink_seq_next(struct seq_file *seq) 2572 { 2573 struct nl_seq_iter *iter = seq->private; 2574 struct netlink_sock *nlk; 2575 2576 do { 2577 for (;;) { 2578 nlk = rhashtable_walk_next(&iter->hti); 2579 2580 if (IS_ERR(nlk)) { 2581 if (PTR_ERR(nlk) == -EAGAIN) 2582 continue; 2583 2584 return nlk; 2585 } 2586 2587 if (nlk) 2588 break; 2589 2590 netlink_walk_stop(iter); 2591 if (++iter->link >= MAX_LINKS) 2592 return NULL; 2593 2594 netlink_walk_start(iter); 2595 } 2596 } while (sock_net(&nlk->sk) != seq_file_net(seq)); 2597 2598 return nlk; 2599 } 2600 2601 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) 2602 __acquires(RCU) 2603 { 2604 struct nl_seq_iter *iter = seq->private; 2605 void *obj = SEQ_START_TOKEN; 2606 loff_t pos; 2607 2608 iter->link = 0; 2609 2610 netlink_walk_start(iter); 2611 2612 for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) 2613 obj = __netlink_seq_next(seq); 2614 2615 return obj; 2616 } 2617 2618 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2619 { 2620 ++*pos; 2621 return __netlink_seq_next(seq); 2622 } 2623 2624 static void netlink_native_seq_stop(struct seq_file *seq, void *v) 2625 { 2626 struct nl_seq_iter *iter = seq->private; 2627 2628 if (iter->link >= MAX_LINKS) 2629 return; 2630 2631 netlink_walk_stop(iter); 2632 } 2633 2634 2635 static int netlink_native_seq_show(struct seq_file *seq, void *v) 2636 { 2637 if (v == SEQ_START_TOKEN) { 2638 seq_puts(seq, 2639 "sk Eth Pid Groups " 2640 "Rmem Wmem Dump Locks Drops Inode\n"); 2641 } else { 2642 struct sock *s = v; 2643 struct netlink_sock *nlk = nlk_sk(s); 2644 2645 seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n", 2646 s, 2647 s->sk_protocol, 2648 nlk->portid, 2649 nlk->groups ? (u32)nlk->groups[0] : 0, 2650 sk_rmem_alloc_get(s), 2651 sk_wmem_alloc_get(s), 2652 nlk->cb_running, 2653 refcount_read(&s->sk_refcnt), 2654 atomic_read(&s->sk_drops), 2655 sock_i_ino(s) 2656 ); 2657 2658 } 2659 return 0; 2660 } 2661 2662 #ifdef CONFIG_BPF_SYSCALL 2663 struct bpf_iter__netlink { 2664 __bpf_md_ptr(struct bpf_iter_meta *, meta); 2665 __bpf_md_ptr(struct netlink_sock *, sk); 2666 }; 2667 2668 DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk) 2669 2670 static int netlink_prog_seq_show(struct bpf_prog *prog, 2671 struct bpf_iter_meta *meta, 2672 void *v) 2673 { 2674 struct bpf_iter__netlink ctx; 2675 2676 meta->seq_num--; /* skip SEQ_START_TOKEN */ 2677 ctx.meta = meta; 2678 ctx.sk = nlk_sk((struct sock *)v); 2679 return bpf_iter_run_prog(prog, &ctx); 2680 } 2681 2682 static int netlink_seq_show(struct seq_file *seq, void *v) 2683 { 2684 struct bpf_iter_meta meta; 2685 struct bpf_prog *prog; 2686 2687 meta.seq = seq; 2688 prog = bpf_iter_get_info(&meta, false); 2689 if (!prog) 2690 return netlink_native_seq_show(seq, v); 2691 2692 if (v != SEQ_START_TOKEN) 2693 return netlink_prog_seq_show(prog, &meta, v); 2694 2695 return 0; 2696 } 2697 2698 static void netlink_seq_stop(struct seq_file *seq, void *v) 2699 { 2700 struct bpf_iter_meta meta; 2701 struct bpf_prog *prog; 2702 2703 if (!v) { 2704 meta.seq = seq; 2705 prog = bpf_iter_get_info(&meta, true); 2706 if (prog) 2707 (void)netlink_prog_seq_show(prog, &meta, v); 2708 } 2709 2710 netlink_native_seq_stop(seq, v); 2711 } 2712 #else 2713 static int netlink_seq_show(struct seq_file *seq, void *v) 2714 { 2715 return netlink_native_seq_show(seq, v); 2716 } 2717 2718 static void netlink_seq_stop(struct seq_file *seq, void *v) 2719 { 2720 netlink_native_seq_stop(seq, v); 2721 } 2722 #endif 2723 2724 static const struct seq_operations netlink_seq_ops = { 2725 .start = netlink_seq_start, 2726 .next = netlink_seq_next, 2727 .stop = netlink_seq_stop, 2728 .show = netlink_seq_show, 2729 }; 2730 #endif 2731 2732 int netlink_register_notifier(struct notifier_block *nb) 2733 { 2734 return blocking_notifier_chain_register(&netlink_chain, nb); 2735 } 2736 EXPORT_SYMBOL(netlink_register_notifier); 2737 2738 int netlink_unregister_notifier(struct notifier_block *nb) 2739 { 2740 return blocking_notifier_chain_unregister(&netlink_chain, nb); 2741 } 2742 EXPORT_SYMBOL(netlink_unregister_notifier); 2743 2744 static const struct proto_ops netlink_ops = { 2745 .family = PF_NETLINK, 2746 .owner = THIS_MODULE, 2747 .release = netlink_release, 2748 .bind = netlink_bind, 2749 .connect = netlink_connect, 2750 .socketpair = sock_no_socketpair, 2751 .accept = sock_no_accept, 2752 .getname = netlink_getname, 2753 .poll = datagram_poll, 2754 .ioctl = netlink_ioctl, 2755 .listen = sock_no_listen, 2756 .shutdown = sock_no_shutdown, 2757 .setsockopt = netlink_setsockopt, 2758 .getsockopt = netlink_getsockopt, 2759 .sendmsg = netlink_sendmsg, 2760 .recvmsg = netlink_recvmsg, 2761 .mmap = sock_no_mmap, 2762 .sendpage = sock_no_sendpage, 2763 }; 2764 2765 static const struct net_proto_family netlink_family_ops = { 2766 .family = PF_NETLINK, 2767 .create = netlink_create, 2768 .owner = THIS_MODULE, /* for consistency 8) */ 2769 }; 2770 2771 static int __net_init netlink_net_init(struct net *net) 2772 { 2773 #ifdef CONFIG_PROC_FS 2774 if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops, 2775 sizeof(struct nl_seq_iter))) 2776 return -ENOMEM; 2777 #endif 2778 return 0; 2779 } 2780 2781 static void __net_exit netlink_net_exit(struct net *net) 2782 { 2783 #ifdef CONFIG_PROC_FS 2784 remove_proc_entry("netlink", net->proc_net); 2785 #endif 2786 } 2787 2788 static void __init netlink_add_usersock_entry(void) 2789 { 2790 struct listeners *listeners; 2791 int groups = 32; 2792 2793 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2794 if (!listeners) 2795 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2796 2797 netlink_table_grab(); 2798 2799 nl_table[NETLINK_USERSOCK].groups = groups; 2800 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2801 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2802 nl_table[NETLINK_USERSOCK].registered = 1; 2803 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2804 2805 netlink_table_ungrab(); 2806 } 2807 2808 static struct pernet_operations __net_initdata netlink_net_ops = { 2809 .init = netlink_net_init, 2810 .exit = netlink_net_exit, 2811 }; 2812 2813 static inline u32 netlink_hash(const void *data, u32 len, u32 seed) 2814 { 2815 const struct netlink_sock *nlk = data; 2816 struct netlink_compare_arg arg; 2817 2818 netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); 2819 return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); 2820 } 2821 2822 static const struct rhashtable_params netlink_rhashtable_params = { 2823 .head_offset = offsetof(struct netlink_sock, node), 2824 .key_len = netlink_compare_arg_len, 2825 .obj_hashfn = netlink_hash, 2826 .obj_cmpfn = netlink_compare, 2827 .automatic_shrinking = true, 2828 }; 2829 2830 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2831 BTF_ID_LIST(btf_netlink_sock_id) 2832 BTF_ID(struct, netlink_sock) 2833 2834 static const struct bpf_iter_seq_info netlink_seq_info = { 2835 .seq_ops = &netlink_seq_ops, 2836 .init_seq_private = bpf_iter_init_seq_net, 2837 .fini_seq_private = bpf_iter_fini_seq_net, 2838 .seq_priv_size = sizeof(struct nl_seq_iter), 2839 }; 2840 2841 static struct bpf_iter_reg netlink_reg_info = { 2842 .target = "netlink", 2843 .ctx_arg_info_size = 1, 2844 .ctx_arg_info = { 2845 { offsetof(struct bpf_iter__netlink, sk), 2846 PTR_TO_BTF_ID_OR_NULL }, 2847 }, 2848 .seq_info = &netlink_seq_info, 2849 }; 2850 2851 static int __init bpf_iter_register(void) 2852 { 2853 netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; 2854 return bpf_iter_reg_target(&netlink_reg_info); 2855 } 2856 #endif 2857 2858 static int __init netlink_proto_init(void) 2859 { 2860 int i; 2861 int err = proto_register(&netlink_proto, 0); 2862 2863 if (err != 0) 2864 goto out; 2865 2866 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2867 err = bpf_iter_register(); 2868 if (err) 2869 goto out; 2870 #endif 2871 2872 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb)); 2873 2874 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2875 if (!nl_table) 2876 goto panic; 2877 2878 for (i = 0; i < MAX_LINKS; i++) { 2879 if (rhashtable_init(&nl_table[i].hash, 2880 &netlink_rhashtable_params) < 0) { 2881 while (--i > 0) 2882 rhashtable_destroy(&nl_table[i].hash); 2883 kfree(nl_table); 2884 goto panic; 2885 } 2886 } 2887 2888 netlink_add_usersock_entry(); 2889 2890 sock_register(&netlink_family_ops); 2891 register_pernet_subsys(&netlink_net_ops); 2892 register_pernet_subsys(&netlink_tap_net_ops); 2893 /* The netlink device handler may be needed early. */ 2894 rtnetlink_init(); 2895 out: 2896 return err; 2897 panic: 2898 panic("netlink_init: Cannot allocate nl_table\n"); 2899 } 2900 2901 core_initcall(netlink_proto_init); 2902