1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * Patrick McHardy <kaber@trash.net> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 14 * added netlink_proto_exit 15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 16 * use nlk_sk, as sk->protinfo is on a diet 8) 17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 18 * - inc module use count of module that owns 19 * the kernel socket in case userspace opens 20 * socket of same protocol 21 * - remove all module support, since netlink is 22 * mandatory if CONFIG_NET=y these days 23 */ 24 25 #include <linux/module.h> 26 27 #include <linux/capability.h> 28 #include <linux/kernel.h> 29 #include <linux/init.h> 30 #include <linux/signal.h> 31 #include <linux/sched.h> 32 #include <linux/errno.h> 33 #include <linux/string.h> 34 #include <linux/stat.h> 35 #include <linux/socket.h> 36 #include <linux/un.h> 37 #include <linux/fcntl.h> 38 #include <linux/termios.h> 39 #include <linux/sockios.h> 40 #include <linux/net.h> 41 #include <linux/fs.h> 42 #include <linux/slab.h> 43 #include <linux/uaccess.h> 44 #include <linux/skbuff.h> 45 #include <linux/netdevice.h> 46 #include <linux/rtnetlink.h> 47 #include <linux/proc_fs.h> 48 #include <linux/seq_file.h> 49 #include <linux/notifier.h> 50 #include <linux/security.h> 51 #include <linux/jhash.h> 52 #include <linux/jiffies.h> 53 #include <linux/random.h> 54 #include <linux/bitops.h> 55 #include <linux/mm.h> 56 #include <linux/types.h> 57 #include <linux/audit.h> 58 #include <linux/mutex.h> 59 #include <linux/vmalloc.h> 60 #include <linux/if_arp.h> 61 #include <linux/rhashtable.h> 62 #include <asm/cacheflush.h> 63 #include <linux/hash.h> 64 #include <linux/genetlink.h> 65 #include <linux/net_namespace.h> 66 67 #include <net/net_namespace.h> 68 #include <net/sock.h> 69 #include <net/scm.h> 70 #include <net/netlink.h> 71 72 #include "af_netlink.h" 73 74 struct listeners { 75 struct rcu_head rcu; 76 unsigned long masks[0]; 77 }; 78 79 /* state bits */ 80 #define NETLINK_S_CONGESTED 0x0 81 82 static inline int netlink_is_kernel(struct sock *sk) 83 { 84 return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; 85 } 86 87 struct netlink_table *nl_table __read_mostly; 88 EXPORT_SYMBOL_GPL(nl_table); 89 90 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 91 92 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; 93 94 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { 95 "nlk_cb_mutex-ROUTE", 96 "nlk_cb_mutex-1", 97 "nlk_cb_mutex-USERSOCK", 98 "nlk_cb_mutex-FIREWALL", 99 "nlk_cb_mutex-SOCK_DIAG", 100 "nlk_cb_mutex-NFLOG", 101 "nlk_cb_mutex-XFRM", 102 "nlk_cb_mutex-SELINUX", 103 "nlk_cb_mutex-ISCSI", 104 "nlk_cb_mutex-AUDIT", 105 "nlk_cb_mutex-FIB_LOOKUP", 106 "nlk_cb_mutex-CONNECTOR", 107 "nlk_cb_mutex-NETFILTER", 108 "nlk_cb_mutex-IP6_FW", 109 "nlk_cb_mutex-DNRTMSG", 110 "nlk_cb_mutex-KOBJECT_UEVENT", 111 "nlk_cb_mutex-GENERIC", 112 "nlk_cb_mutex-17", 113 "nlk_cb_mutex-SCSITRANSPORT", 114 "nlk_cb_mutex-ECRYPTFS", 115 "nlk_cb_mutex-RDMA", 116 "nlk_cb_mutex-CRYPTO", 117 "nlk_cb_mutex-SMC", 118 "nlk_cb_mutex-23", 119 "nlk_cb_mutex-24", 120 "nlk_cb_mutex-25", 121 "nlk_cb_mutex-26", 122 "nlk_cb_mutex-27", 123 "nlk_cb_mutex-28", 124 "nlk_cb_mutex-29", 125 "nlk_cb_mutex-30", 126 "nlk_cb_mutex-31", 127 "nlk_cb_mutex-MAX_LINKS" 128 }; 129 130 static int netlink_dump(struct sock *sk); 131 132 /* nl_table locking explained: 133 * Lookup and traversal are protected with an RCU read-side lock. Insertion 134 * and removal are protected with per bucket lock while using RCU list 135 * modification primitives and may run in parallel to RCU protected lookups. 136 * Destruction of the Netlink socket may only occur *after* nl_table_lock has 137 * been acquired * either during or after the socket has been removed from 138 * the list and after an RCU grace period. 139 */ 140 DEFINE_RWLOCK(nl_table_lock); 141 EXPORT_SYMBOL_GPL(nl_table_lock); 142 static atomic_t nl_table_users = ATOMIC_INIT(0); 143 144 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 145 146 static BLOCKING_NOTIFIER_HEAD(netlink_chain); 147 148 static DEFINE_SPINLOCK(netlink_tap_lock); 149 static struct list_head netlink_tap_all __read_mostly; 150 151 static const struct rhashtable_params netlink_rhashtable_params; 152 153 static inline u32 netlink_group_mask(u32 group) 154 { 155 return group ? 1 << (group - 1) : 0; 156 } 157 158 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, 159 gfp_t gfp_mask) 160 { 161 unsigned int len = skb_end_offset(skb); 162 struct sk_buff *new; 163 164 new = alloc_skb(len, gfp_mask); 165 if (new == NULL) 166 return NULL; 167 168 NETLINK_CB(new).portid = NETLINK_CB(skb).portid; 169 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; 170 NETLINK_CB(new).creds = NETLINK_CB(skb).creds; 171 172 skb_put_data(new, skb->data, len); 173 return new; 174 } 175 176 int netlink_add_tap(struct netlink_tap *nt) 177 { 178 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 179 return -EINVAL; 180 181 spin_lock(&netlink_tap_lock); 182 list_add_rcu(&nt->list, &netlink_tap_all); 183 spin_unlock(&netlink_tap_lock); 184 185 __module_get(nt->module); 186 187 return 0; 188 } 189 EXPORT_SYMBOL_GPL(netlink_add_tap); 190 191 static int __netlink_remove_tap(struct netlink_tap *nt) 192 { 193 bool found = false; 194 struct netlink_tap *tmp; 195 196 spin_lock(&netlink_tap_lock); 197 198 list_for_each_entry(tmp, &netlink_tap_all, list) { 199 if (nt == tmp) { 200 list_del_rcu(&nt->list); 201 found = true; 202 goto out; 203 } 204 } 205 206 pr_warn("__netlink_remove_tap: %p not found\n", nt); 207 out: 208 spin_unlock(&netlink_tap_lock); 209 210 if (found) 211 module_put(nt->module); 212 213 return found ? 0 : -ENODEV; 214 } 215 216 int netlink_remove_tap(struct netlink_tap *nt) 217 { 218 int ret; 219 220 ret = __netlink_remove_tap(nt); 221 synchronize_net(); 222 223 return ret; 224 } 225 EXPORT_SYMBOL_GPL(netlink_remove_tap); 226 227 static bool netlink_filter_tap(const struct sk_buff *skb) 228 { 229 struct sock *sk = skb->sk; 230 231 /* We take the more conservative approach and 232 * whitelist socket protocols that may pass. 233 */ 234 switch (sk->sk_protocol) { 235 case NETLINK_ROUTE: 236 case NETLINK_USERSOCK: 237 case NETLINK_SOCK_DIAG: 238 case NETLINK_NFLOG: 239 case NETLINK_XFRM: 240 case NETLINK_FIB_LOOKUP: 241 case NETLINK_NETFILTER: 242 case NETLINK_GENERIC: 243 return true; 244 } 245 246 return false; 247 } 248 249 static int __netlink_deliver_tap_skb(struct sk_buff *skb, 250 struct net_device *dev) 251 { 252 struct sk_buff *nskb; 253 struct sock *sk = skb->sk; 254 int ret = -ENOMEM; 255 256 if (!net_eq(dev_net(dev), sock_net(sk))) 257 return 0; 258 259 dev_hold(dev); 260 261 if (is_vmalloc_addr(skb->head)) 262 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 263 else 264 nskb = skb_clone(skb, GFP_ATOMIC); 265 if (nskb) { 266 nskb->dev = dev; 267 nskb->protocol = htons((u16) sk->sk_protocol); 268 nskb->pkt_type = netlink_is_kernel(sk) ? 269 PACKET_KERNEL : PACKET_USER; 270 skb_reset_network_header(nskb); 271 ret = dev_queue_xmit(nskb); 272 if (unlikely(ret > 0)) 273 ret = net_xmit_errno(ret); 274 } 275 276 dev_put(dev); 277 return ret; 278 } 279 280 static void __netlink_deliver_tap(struct sk_buff *skb) 281 { 282 int ret; 283 struct netlink_tap *tmp; 284 285 if (!netlink_filter_tap(skb)) 286 return; 287 288 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { 289 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 290 if (unlikely(ret)) 291 break; 292 } 293 } 294 295 static void netlink_deliver_tap(struct sk_buff *skb) 296 { 297 rcu_read_lock(); 298 299 if (unlikely(!list_empty(&netlink_tap_all))) 300 __netlink_deliver_tap(skb); 301 302 rcu_read_unlock(); 303 } 304 305 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, 306 struct sk_buff *skb) 307 { 308 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) 309 netlink_deliver_tap(skb); 310 } 311 312 static void netlink_overrun(struct sock *sk) 313 { 314 struct netlink_sock *nlk = nlk_sk(sk); 315 316 if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { 317 if (!test_and_set_bit(NETLINK_S_CONGESTED, 318 &nlk_sk(sk)->state)) { 319 sk->sk_err = ENOBUFS; 320 sk->sk_error_report(sk); 321 } 322 } 323 atomic_inc(&sk->sk_drops); 324 } 325 326 static void netlink_rcv_wake(struct sock *sk) 327 { 328 struct netlink_sock *nlk = nlk_sk(sk); 329 330 if (skb_queue_empty(&sk->sk_receive_queue)) 331 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 332 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) 333 wake_up_interruptible(&nlk->wait); 334 } 335 336 static void netlink_skb_destructor(struct sk_buff *skb) 337 { 338 if (is_vmalloc_addr(skb->head)) { 339 if (!skb->cloned || 340 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 341 vfree(skb->head); 342 343 skb->head = NULL; 344 } 345 if (skb->sk != NULL) 346 sock_rfree(skb); 347 } 348 349 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 350 { 351 WARN_ON(skb->sk != NULL); 352 skb->sk = sk; 353 skb->destructor = netlink_skb_destructor; 354 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 355 sk_mem_charge(sk, skb->truesize); 356 } 357 358 static void netlink_sock_destruct(struct sock *sk) 359 { 360 struct netlink_sock *nlk = nlk_sk(sk); 361 362 if (nlk->cb_running) { 363 if (nlk->cb.done) 364 nlk->cb.done(&nlk->cb); 365 module_put(nlk->cb.module); 366 kfree_skb(nlk->cb.skb); 367 } 368 369 skb_queue_purge(&sk->sk_receive_queue); 370 371 if (!sock_flag(sk, SOCK_DEAD)) { 372 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 373 return; 374 } 375 376 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 377 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 378 WARN_ON(nlk_sk(sk)->groups); 379 } 380 381 static void netlink_sock_destruct_work(struct work_struct *work) 382 { 383 struct netlink_sock *nlk = container_of(work, struct netlink_sock, 384 work); 385 386 sk_free(&nlk->sk); 387 } 388 389 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 390 * SMP. Look, when several writers sleep and reader wakes them up, all but one 391 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 392 * this, _but_ remember, it adds useless work on UP machines. 393 */ 394 395 void netlink_table_grab(void) 396 __acquires(nl_table_lock) 397 { 398 might_sleep(); 399 400 write_lock_irq(&nl_table_lock); 401 402 if (atomic_read(&nl_table_users)) { 403 DECLARE_WAITQUEUE(wait, current); 404 405 add_wait_queue_exclusive(&nl_table_wait, &wait); 406 for (;;) { 407 set_current_state(TASK_UNINTERRUPTIBLE); 408 if (atomic_read(&nl_table_users) == 0) 409 break; 410 write_unlock_irq(&nl_table_lock); 411 schedule(); 412 write_lock_irq(&nl_table_lock); 413 } 414 415 __set_current_state(TASK_RUNNING); 416 remove_wait_queue(&nl_table_wait, &wait); 417 } 418 } 419 420 void netlink_table_ungrab(void) 421 __releases(nl_table_lock) 422 { 423 write_unlock_irq(&nl_table_lock); 424 wake_up(&nl_table_wait); 425 } 426 427 static inline void 428 netlink_lock_table(void) 429 { 430 /* read_lock() synchronizes us to netlink_table_grab */ 431 432 read_lock(&nl_table_lock); 433 atomic_inc(&nl_table_users); 434 read_unlock(&nl_table_lock); 435 } 436 437 static inline void 438 netlink_unlock_table(void) 439 { 440 if (atomic_dec_and_test(&nl_table_users)) 441 wake_up(&nl_table_wait); 442 } 443 444 struct netlink_compare_arg 445 { 446 possible_net_t pnet; 447 u32 portid; 448 }; 449 450 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ 451 #define netlink_compare_arg_len \ 452 (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) 453 454 static inline int netlink_compare(struct rhashtable_compare_arg *arg, 455 const void *ptr) 456 { 457 const struct netlink_compare_arg *x = arg->key; 458 const struct netlink_sock *nlk = ptr; 459 460 return nlk->portid != x->portid || 461 !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); 462 } 463 464 static void netlink_compare_arg_init(struct netlink_compare_arg *arg, 465 struct net *net, u32 portid) 466 { 467 memset(arg, 0, sizeof(*arg)); 468 write_pnet(&arg->pnet, net); 469 arg->portid = portid; 470 } 471 472 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, 473 struct net *net) 474 { 475 struct netlink_compare_arg arg; 476 477 netlink_compare_arg_init(&arg, net, portid); 478 return rhashtable_lookup_fast(&table->hash, &arg, 479 netlink_rhashtable_params); 480 } 481 482 static int __netlink_insert(struct netlink_table *table, struct sock *sk) 483 { 484 struct netlink_compare_arg arg; 485 486 netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); 487 return rhashtable_lookup_insert_key(&table->hash, &arg, 488 &nlk_sk(sk)->node, 489 netlink_rhashtable_params); 490 } 491 492 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 493 { 494 struct netlink_table *table = &nl_table[protocol]; 495 struct sock *sk; 496 497 rcu_read_lock(); 498 sk = __netlink_lookup(table, portid, net); 499 if (sk) 500 sock_hold(sk); 501 rcu_read_unlock(); 502 503 return sk; 504 } 505 506 static const struct proto_ops netlink_ops; 507 508 static void 509 netlink_update_listeners(struct sock *sk) 510 { 511 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 512 unsigned long mask; 513 unsigned int i; 514 struct listeners *listeners; 515 516 listeners = nl_deref_protected(tbl->listeners); 517 if (!listeners) 518 return; 519 520 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 521 mask = 0; 522 sk_for_each_bound(sk, &tbl->mc_list) { 523 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 524 mask |= nlk_sk(sk)->groups[i]; 525 } 526 listeners->masks[i] = mask; 527 } 528 /* this function is only called with the netlink table "grabbed", which 529 * makes sure updates are visible before bind or setsockopt return. */ 530 } 531 532 static int netlink_insert(struct sock *sk, u32 portid) 533 { 534 struct netlink_table *table = &nl_table[sk->sk_protocol]; 535 int err; 536 537 lock_sock(sk); 538 539 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; 540 if (nlk_sk(sk)->bound) 541 goto err; 542 543 err = -ENOMEM; 544 if (BITS_PER_LONG > 32 && 545 unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) 546 goto err; 547 548 nlk_sk(sk)->portid = portid; 549 sock_hold(sk); 550 551 err = __netlink_insert(table, sk); 552 if (err) { 553 /* In case the hashtable backend returns with -EBUSY 554 * from here, it must not escape to the caller. 555 */ 556 if (unlikely(err == -EBUSY)) 557 err = -EOVERFLOW; 558 if (err == -EEXIST) 559 err = -EADDRINUSE; 560 sock_put(sk); 561 goto err; 562 } 563 564 /* We need to ensure that the socket is hashed and visible. */ 565 smp_wmb(); 566 nlk_sk(sk)->bound = portid; 567 568 err: 569 release_sock(sk); 570 return err; 571 } 572 573 static void netlink_remove(struct sock *sk) 574 { 575 struct netlink_table *table; 576 577 table = &nl_table[sk->sk_protocol]; 578 if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, 579 netlink_rhashtable_params)) { 580 WARN_ON(refcount_read(&sk->sk_refcnt) == 1); 581 __sock_put(sk); 582 } 583 584 netlink_table_grab(); 585 if (nlk_sk(sk)->subscriptions) { 586 __sk_del_bind_node(sk); 587 netlink_update_listeners(sk); 588 } 589 if (sk->sk_protocol == NETLINK_GENERIC) 590 atomic_inc(&genl_sk_destructing_cnt); 591 netlink_table_ungrab(); 592 } 593 594 static struct proto netlink_proto = { 595 .name = "NETLINK", 596 .owner = THIS_MODULE, 597 .obj_size = sizeof(struct netlink_sock), 598 }; 599 600 static int __netlink_create(struct net *net, struct socket *sock, 601 struct mutex *cb_mutex, int protocol, 602 int kern) 603 { 604 struct sock *sk; 605 struct netlink_sock *nlk; 606 607 sock->ops = &netlink_ops; 608 609 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); 610 if (!sk) 611 return -ENOMEM; 612 613 sock_init_data(sock, sk); 614 615 nlk = nlk_sk(sk); 616 if (cb_mutex) { 617 nlk->cb_mutex = cb_mutex; 618 } else { 619 nlk->cb_mutex = &nlk->cb_def_mutex; 620 mutex_init(nlk->cb_mutex); 621 lockdep_set_class_and_name(nlk->cb_mutex, 622 nlk_cb_mutex_keys + protocol, 623 nlk_cb_mutex_key_strings[protocol]); 624 } 625 init_waitqueue_head(&nlk->wait); 626 627 sk->sk_destruct = netlink_sock_destruct; 628 sk->sk_protocol = protocol; 629 return 0; 630 } 631 632 static int netlink_create(struct net *net, struct socket *sock, int protocol, 633 int kern) 634 { 635 struct module *module = NULL; 636 struct mutex *cb_mutex; 637 struct netlink_sock *nlk; 638 int (*bind)(struct net *net, int group); 639 void (*unbind)(struct net *net, int group); 640 int err = 0; 641 642 sock->state = SS_UNCONNECTED; 643 644 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 645 return -ESOCKTNOSUPPORT; 646 647 if (protocol < 0 || protocol >= MAX_LINKS) 648 return -EPROTONOSUPPORT; 649 650 netlink_lock_table(); 651 #ifdef CONFIG_MODULES 652 if (!nl_table[protocol].registered) { 653 netlink_unlock_table(); 654 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 655 netlink_lock_table(); 656 } 657 #endif 658 if (nl_table[protocol].registered && 659 try_module_get(nl_table[protocol].module)) 660 module = nl_table[protocol].module; 661 else 662 err = -EPROTONOSUPPORT; 663 cb_mutex = nl_table[protocol].cb_mutex; 664 bind = nl_table[protocol].bind; 665 unbind = nl_table[protocol].unbind; 666 netlink_unlock_table(); 667 668 if (err < 0) 669 goto out; 670 671 err = __netlink_create(net, sock, cb_mutex, protocol, kern); 672 if (err < 0) 673 goto out_module; 674 675 local_bh_disable(); 676 sock_prot_inuse_add(net, &netlink_proto, 1); 677 local_bh_enable(); 678 679 nlk = nlk_sk(sock->sk); 680 nlk->module = module; 681 nlk->netlink_bind = bind; 682 nlk->netlink_unbind = unbind; 683 out: 684 return err; 685 686 out_module: 687 module_put(module); 688 goto out; 689 } 690 691 static void deferred_put_nlk_sk(struct rcu_head *head) 692 { 693 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); 694 struct sock *sk = &nlk->sk; 695 696 kfree(nlk->groups); 697 nlk->groups = NULL; 698 699 if (!refcount_dec_and_test(&sk->sk_refcnt)) 700 return; 701 702 if (nlk->cb_running && nlk->cb.done) { 703 INIT_WORK(&nlk->work, netlink_sock_destruct_work); 704 schedule_work(&nlk->work); 705 return; 706 } 707 708 sk_free(sk); 709 } 710 711 static int netlink_release(struct socket *sock) 712 { 713 struct sock *sk = sock->sk; 714 struct netlink_sock *nlk; 715 716 if (!sk) 717 return 0; 718 719 netlink_remove(sk); 720 sock_orphan(sk); 721 nlk = nlk_sk(sk); 722 723 /* 724 * OK. Socket is unlinked, any packets that arrive now 725 * will be purged. 726 */ 727 728 /* must not acquire netlink_table_lock in any way again before unbind 729 * and notifying genetlink is done as otherwise it might deadlock 730 */ 731 if (nlk->netlink_unbind) { 732 int i; 733 734 for (i = 0; i < nlk->ngroups; i++) 735 if (test_bit(i, nlk->groups)) 736 nlk->netlink_unbind(sock_net(sk), i + 1); 737 } 738 if (sk->sk_protocol == NETLINK_GENERIC && 739 atomic_dec_return(&genl_sk_destructing_cnt) == 0) 740 wake_up(&genl_sk_destructing_waitq); 741 742 sock->sk = NULL; 743 wake_up_interruptible_all(&nlk->wait); 744 745 skb_queue_purge(&sk->sk_write_queue); 746 747 if (nlk->portid && nlk->bound) { 748 struct netlink_notify n = { 749 .net = sock_net(sk), 750 .protocol = sk->sk_protocol, 751 .portid = nlk->portid, 752 }; 753 blocking_notifier_call_chain(&netlink_chain, 754 NETLINK_URELEASE, &n); 755 } 756 757 module_put(nlk->module); 758 759 if (netlink_is_kernel(sk)) { 760 netlink_table_grab(); 761 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 762 if (--nl_table[sk->sk_protocol].registered == 0) { 763 struct listeners *old; 764 765 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 766 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 767 kfree_rcu(old, rcu); 768 nl_table[sk->sk_protocol].module = NULL; 769 nl_table[sk->sk_protocol].bind = NULL; 770 nl_table[sk->sk_protocol].unbind = NULL; 771 nl_table[sk->sk_protocol].flags = 0; 772 nl_table[sk->sk_protocol].registered = 0; 773 } 774 netlink_table_ungrab(); 775 } 776 777 local_bh_disable(); 778 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 779 local_bh_enable(); 780 call_rcu(&nlk->rcu, deferred_put_nlk_sk); 781 return 0; 782 } 783 784 static int netlink_autobind(struct socket *sock) 785 { 786 struct sock *sk = sock->sk; 787 struct net *net = sock_net(sk); 788 struct netlink_table *table = &nl_table[sk->sk_protocol]; 789 s32 portid = task_tgid_vnr(current); 790 int err; 791 s32 rover = -4096; 792 bool ok; 793 794 retry: 795 cond_resched(); 796 rcu_read_lock(); 797 ok = !__netlink_lookup(table, portid, net); 798 rcu_read_unlock(); 799 if (!ok) { 800 /* Bind collision, search negative portid values. */ 801 if (rover == -4096) 802 /* rover will be in range [S32_MIN, -4097] */ 803 rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN); 804 else if (rover >= -4096) 805 rover = -4097; 806 portid = rover--; 807 goto retry; 808 } 809 810 err = netlink_insert(sk, portid); 811 if (err == -EADDRINUSE) 812 goto retry; 813 814 /* If 2 threads race to autobind, that is fine. */ 815 if (err == -EBUSY) 816 err = 0; 817 818 return err; 819 } 820 821 /** 822 * __netlink_ns_capable - General netlink message capability test 823 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. 824 * @user_ns: The user namespace of the capability to use 825 * @cap: The capability to use 826 * 827 * Test to see if the opener of the socket we received the message 828 * from had when the netlink socket was created and the sender of the 829 * message has has the capability @cap in the user namespace @user_ns. 830 */ 831 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, 832 struct user_namespace *user_ns, int cap) 833 { 834 return ((nsp->flags & NETLINK_SKB_DST) || 835 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && 836 ns_capable(user_ns, cap); 837 } 838 EXPORT_SYMBOL(__netlink_ns_capable); 839 840 /** 841 * netlink_ns_capable - General netlink message capability test 842 * @skb: socket buffer holding a netlink command from userspace 843 * @user_ns: The user namespace of the capability to use 844 * @cap: The capability to use 845 * 846 * Test to see if the opener of the socket we received the message 847 * from had when the netlink socket was created and the sender of the 848 * message has has the capability @cap in the user namespace @user_ns. 849 */ 850 bool netlink_ns_capable(const struct sk_buff *skb, 851 struct user_namespace *user_ns, int cap) 852 { 853 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); 854 } 855 EXPORT_SYMBOL(netlink_ns_capable); 856 857 /** 858 * netlink_capable - Netlink global message capability test 859 * @skb: socket buffer holding a netlink command from userspace 860 * @cap: The capability to use 861 * 862 * Test to see if the opener of the socket we received the message 863 * from had when the netlink socket was created and the sender of the 864 * message has has the capability @cap in all user namespaces. 865 */ 866 bool netlink_capable(const struct sk_buff *skb, int cap) 867 { 868 return netlink_ns_capable(skb, &init_user_ns, cap); 869 } 870 EXPORT_SYMBOL(netlink_capable); 871 872 /** 873 * netlink_net_capable - Netlink network namespace message capability test 874 * @skb: socket buffer holding a netlink command from userspace 875 * @cap: The capability to use 876 * 877 * Test to see if the opener of the socket we received the message 878 * from had when the netlink socket was created and the sender of the 879 * message has has the capability @cap over the network namespace of 880 * the socket we received the message from. 881 */ 882 bool netlink_net_capable(const struct sk_buff *skb, int cap) 883 { 884 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); 885 } 886 EXPORT_SYMBOL(netlink_net_capable); 887 888 static inline int netlink_allowed(const struct socket *sock, unsigned int flag) 889 { 890 return (nl_table[sock->sk->sk_protocol].flags & flag) || 891 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 892 } 893 894 static void 895 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 896 { 897 struct netlink_sock *nlk = nlk_sk(sk); 898 899 if (nlk->subscriptions && !subscriptions) 900 __sk_del_bind_node(sk); 901 else if (!nlk->subscriptions && subscriptions) 902 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 903 nlk->subscriptions = subscriptions; 904 } 905 906 static int netlink_realloc_groups(struct sock *sk) 907 { 908 struct netlink_sock *nlk = nlk_sk(sk); 909 unsigned int groups; 910 unsigned long *new_groups; 911 int err = 0; 912 913 netlink_table_grab(); 914 915 groups = nl_table[sk->sk_protocol].groups; 916 if (!nl_table[sk->sk_protocol].registered) { 917 err = -ENOENT; 918 goto out_unlock; 919 } 920 921 if (nlk->ngroups >= groups) 922 goto out_unlock; 923 924 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 925 if (new_groups == NULL) { 926 err = -ENOMEM; 927 goto out_unlock; 928 } 929 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 930 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 931 932 nlk->groups = new_groups; 933 nlk->ngroups = groups; 934 out_unlock: 935 netlink_table_ungrab(); 936 return err; 937 } 938 939 static void netlink_undo_bind(int group, long unsigned int groups, 940 struct sock *sk) 941 { 942 struct netlink_sock *nlk = nlk_sk(sk); 943 int undo; 944 945 if (!nlk->netlink_unbind) 946 return; 947 948 for (undo = 0; undo < group; undo++) 949 if (test_bit(undo, &groups)) 950 nlk->netlink_unbind(sock_net(sk), undo + 1); 951 } 952 953 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 954 int addr_len) 955 { 956 struct sock *sk = sock->sk; 957 struct net *net = sock_net(sk); 958 struct netlink_sock *nlk = nlk_sk(sk); 959 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 960 int err = 0; 961 long unsigned int groups = nladdr->nl_groups; 962 bool bound; 963 964 if (addr_len < sizeof(struct sockaddr_nl)) 965 return -EINVAL; 966 967 if (nladdr->nl_family != AF_NETLINK) 968 return -EINVAL; 969 970 /* Only superuser is allowed to listen multicasts */ 971 if (groups) { 972 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 973 return -EPERM; 974 err = netlink_realloc_groups(sk); 975 if (err) 976 return err; 977 } 978 979 bound = nlk->bound; 980 if (bound) { 981 /* Ensure nlk->portid is up-to-date. */ 982 smp_rmb(); 983 984 if (nladdr->nl_pid != nlk->portid) 985 return -EINVAL; 986 } 987 988 netlink_lock_table(); 989 if (nlk->netlink_bind && groups) { 990 int group; 991 992 for (group = 0; group < nlk->ngroups; group++) { 993 if (!test_bit(group, &groups)) 994 continue; 995 err = nlk->netlink_bind(net, group + 1); 996 if (!err) 997 continue; 998 netlink_undo_bind(group, groups, sk); 999 goto unlock; 1000 } 1001 } 1002 1003 /* No need for barriers here as we return to user-space without 1004 * using any of the bound attributes. 1005 */ 1006 if (!bound) { 1007 err = nladdr->nl_pid ? 1008 netlink_insert(sk, nladdr->nl_pid) : 1009 netlink_autobind(sock); 1010 if (err) { 1011 netlink_undo_bind(nlk->ngroups, groups, sk); 1012 goto unlock; 1013 } 1014 } 1015 1016 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 1017 goto unlock; 1018 netlink_unlock_table(); 1019 1020 netlink_table_grab(); 1021 netlink_update_subscriptions(sk, nlk->subscriptions + 1022 hweight32(groups) - 1023 hweight32(nlk->groups[0])); 1024 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; 1025 netlink_update_listeners(sk); 1026 netlink_table_ungrab(); 1027 1028 return 0; 1029 1030 unlock: 1031 netlink_unlock_table(); 1032 return err; 1033 } 1034 1035 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 1036 int alen, int flags) 1037 { 1038 int err = 0; 1039 struct sock *sk = sock->sk; 1040 struct netlink_sock *nlk = nlk_sk(sk); 1041 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1042 1043 if (alen < sizeof(addr->sa_family)) 1044 return -EINVAL; 1045 1046 if (addr->sa_family == AF_UNSPEC) { 1047 sk->sk_state = NETLINK_UNCONNECTED; 1048 nlk->dst_portid = 0; 1049 nlk->dst_group = 0; 1050 return 0; 1051 } 1052 if (addr->sa_family != AF_NETLINK) 1053 return -EINVAL; 1054 1055 if ((nladdr->nl_groups || nladdr->nl_pid) && 1056 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1057 return -EPERM; 1058 1059 /* No need for barriers here as we return to user-space without 1060 * using any of the bound attributes. 1061 */ 1062 if (!nlk->bound) 1063 err = netlink_autobind(sock); 1064 1065 if (err == 0) { 1066 sk->sk_state = NETLINK_CONNECTED; 1067 nlk->dst_portid = nladdr->nl_pid; 1068 nlk->dst_group = ffs(nladdr->nl_groups); 1069 } 1070 1071 return err; 1072 } 1073 1074 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1075 int *addr_len, int peer) 1076 { 1077 struct sock *sk = sock->sk; 1078 struct netlink_sock *nlk = nlk_sk(sk); 1079 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 1080 1081 nladdr->nl_family = AF_NETLINK; 1082 nladdr->nl_pad = 0; 1083 *addr_len = sizeof(*nladdr); 1084 1085 if (peer) { 1086 nladdr->nl_pid = nlk->dst_portid; 1087 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 1088 } else { 1089 nladdr->nl_pid = nlk->portid; 1090 netlink_lock_table(); 1091 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1092 netlink_unlock_table(); 1093 } 1094 return 0; 1095 } 1096 1097 static int netlink_ioctl(struct socket *sock, unsigned int cmd, 1098 unsigned long arg) 1099 { 1100 /* try to hand this ioctl down to the NIC drivers. 1101 */ 1102 return -ENOIOCTLCMD; 1103 } 1104 1105 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1106 { 1107 struct sock *sock; 1108 struct netlink_sock *nlk; 1109 1110 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 1111 if (!sock) 1112 return ERR_PTR(-ECONNREFUSED); 1113 1114 /* Don't bother queuing skb if kernel socket has no input function */ 1115 nlk = nlk_sk(sock); 1116 if (sock->sk_state == NETLINK_CONNECTED && 1117 nlk->dst_portid != nlk_sk(ssk)->portid) { 1118 sock_put(sock); 1119 return ERR_PTR(-ECONNREFUSED); 1120 } 1121 return sock; 1122 } 1123 1124 struct sock *netlink_getsockbyfilp(struct file *filp) 1125 { 1126 struct inode *inode = file_inode(filp); 1127 struct sock *sock; 1128 1129 if (!S_ISSOCK(inode->i_mode)) 1130 return ERR_PTR(-ENOTSOCK); 1131 1132 sock = SOCKET_I(inode)->sk; 1133 if (sock->sk_family != AF_NETLINK) 1134 return ERR_PTR(-EINVAL); 1135 1136 sock_hold(sock); 1137 return sock; 1138 } 1139 1140 static struct sk_buff *netlink_alloc_large_skb(unsigned int size, 1141 int broadcast) 1142 { 1143 struct sk_buff *skb; 1144 void *data; 1145 1146 if (size <= NLMSG_GOODSIZE || broadcast) 1147 return alloc_skb(size, GFP_KERNEL); 1148 1149 size = SKB_DATA_ALIGN(size) + 1150 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1151 1152 data = vmalloc(size); 1153 if (data == NULL) 1154 return NULL; 1155 1156 skb = __build_skb(data, size); 1157 if (skb == NULL) 1158 vfree(data); 1159 else 1160 skb->destructor = netlink_skb_destructor; 1161 1162 return skb; 1163 } 1164 1165 /* 1166 * Attach a skb to a netlink socket. 1167 * The caller must hold a reference to the destination socket. On error, the 1168 * reference is dropped. The skb is not send to the destination, just all 1169 * all error checks are performed and memory in the queue is reserved. 1170 * Return values: 1171 * < 0: error. skb freed, reference to sock dropped. 1172 * 0: continue 1173 * 1: repeat lookup - reference dropped while waiting for socket memory. 1174 */ 1175 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1176 long *timeo, struct sock *ssk) 1177 { 1178 struct netlink_sock *nlk; 1179 1180 nlk = nlk_sk(sk); 1181 1182 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1183 test_bit(NETLINK_S_CONGESTED, &nlk->state))) { 1184 DECLARE_WAITQUEUE(wait, current); 1185 if (!*timeo) { 1186 if (!ssk || netlink_is_kernel(ssk)) 1187 netlink_overrun(sk); 1188 sock_put(sk); 1189 kfree_skb(skb); 1190 return -EAGAIN; 1191 } 1192 1193 __set_current_state(TASK_INTERRUPTIBLE); 1194 add_wait_queue(&nlk->wait, &wait); 1195 1196 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1197 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1198 !sock_flag(sk, SOCK_DEAD)) 1199 *timeo = schedule_timeout(*timeo); 1200 1201 __set_current_state(TASK_RUNNING); 1202 remove_wait_queue(&nlk->wait, &wait); 1203 sock_put(sk); 1204 1205 if (signal_pending(current)) { 1206 kfree_skb(skb); 1207 return sock_intr_errno(*timeo); 1208 } 1209 return 1; 1210 } 1211 netlink_skb_set_owner_r(skb, sk); 1212 return 0; 1213 } 1214 1215 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1216 { 1217 int len = skb->len; 1218 1219 netlink_deliver_tap(skb); 1220 1221 skb_queue_tail(&sk->sk_receive_queue, skb); 1222 sk->sk_data_ready(sk); 1223 return len; 1224 } 1225 1226 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1227 { 1228 int len = __netlink_sendskb(sk, skb); 1229 1230 sock_put(sk); 1231 return len; 1232 } 1233 1234 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 1235 { 1236 kfree_skb(skb); 1237 sock_put(sk); 1238 } 1239 1240 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 1241 { 1242 int delta; 1243 1244 WARN_ON(skb->sk != NULL); 1245 delta = skb->end - skb->tail; 1246 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1247 return skb; 1248 1249 if (skb_shared(skb)) { 1250 struct sk_buff *nskb = skb_clone(skb, allocation); 1251 if (!nskb) 1252 return skb; 1253 consume_skb(skb); 1254 skb = nskb; 1255 } 1256 1257 pskb_expand_head(skb, 0, -delta, 1258 (allocation & ~__GFP_DIRECT_RECLAIM) | 1259 __GFP_NOWARN | __GFP_NORETRY); 1260 return skb; 1261 } 1262 1263 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1264 struct sock *ssk) 1265 { 1266 int ret; 1267 struct netlink_sock *nlk = nlk_sk(sk); 1268 1269 ret = -ECONNREFUSED; 1270 if (nlk->netlink_rcv != NULL) { 1271 ret = skb->len; 1272 netlink_skb_set_owner_r(skb, sk); 1273 NETLINK_CB(skb).sk = ssk; 1274 netlink_deliver_tap_kernel(sk, ssk, skb); 1275 nlk->netlink_rcv(skb); 1276 consume_skb(skb); 1277 } else { 1278 kfree_skb(skb); 1279 } 1280 sock_put(sk); 1281 return ret; 1282 } 1283 1284 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 1285 u32 portid, int nonblock) 1286 { 1287 struct sock *sk; 1288 int err; 1289 long timeo; 1290 1291 skb = netlink_trim(skb, gfp_any()); 1292 1293 timeo = sock_sndtimeo(ssk, nonblock); 1294 retry: 1295 sk = netlink_getsockbyportid(ssk, portid); 1296 if (IS_ERR(sk)) { 1297 kfree_skb(skb); 1298 return PTR_ERR(sk); 1299 } 1300 if (netlink_is_kernel(sk)) 1301 return netlink_unicast_kernel(sk, skb, ssk); 1302 1303 if (sk_filter(sk, skb)) { 1304 err = skb->len; 1305 kfree_skb(skb); 1306 sock_put(sk); 1307 return err; 1308 } 1309 1310 err = netlink_attachskb(sk, skb, &timeo, ssk); 1311 if (err == 1) 1312 goto retry; 1313 if (err) 1314 return err; 1315 1316 return netlink_sendskb(sk, skb); 1317 } 1318 EXPORT_SYMBOL(netlink_unicast); 1319 1320 int netlink_has_listeners(struct sock *sk, unsigned int group) 1321 { 1322 int res = 0; 1323 struct listeners *listeners; 1324 1325 BUG_ON(!netlink_is_kernel(sk)); 1326 1327 rcu_read_lock(); 1328 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 1329 1330 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 1331 res = test_bit(group - 1, listeners->masks); 1332 1333 rcu_read_unlock(); 1334 1335 return res; 1336 } 1337 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1338 1339 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1340 { 1341 struct netlink_sock *nlk = nlk_sk(sk); 1342 1343 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1344 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1345 netlink_skb_set_owner_r(skb, sk); 1346 __netlink_sendskb(sk, skb); 1347 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1348 } 1349 return -1; 1350 } 1351 1352 struct netlink_broadcast_data { 1353 struct sock *exclude_sk; 1354 struct net *net; 1355 u32 portid; 1356 u32 group; 1357 int failure; 1358 int delivery_failure; 1359 int congested; 1360 int delivered; 1361 gfp_t allocation; 1362 struct sk_buff *skb, *skb2; 1363 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); 1364 void *tx_data; 1365 }; 1366 1367 static void do_one_broadcast(struct sock *sk, 1368 struct netlink_broadcast_data *p) 1369 { 1370 struct netlink_sock *nlk = nlk_sk(sk); 1371 int val; 1372 1373 if (p->exclude_sk == sk) 1374 return; 1375 1376 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1377 !test_bit(p->group - 1, nlk->groups)) 1378 return; 1379 1380 if (!net_eq(sock_net(sk), p->net)) { 1381 if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) 1382 return; 1383 1384 if (!peernet_has_id(sock_net(sk), p->net)) 1385 return; 1386 1387 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, 1388 CAP_NET_BROADCAST)) 1389 return; 1390 } 1391 1392 if (p->failure) { 1393 netlink_overrun(sk); 1394 return; 1395 } 1396 1397 sock_hold(sk); 1398 if (p->skb2 == NULL) { 1399 if (skb_shared(p->skb)) { 1400 p->skb2 = skb_clone(p->skb, p->allocation); 1401 } else { 1402 p->skb2 = skb_get(p->skb); 1403 /* 1404 * skb ownership may have been set when 1405 * delivered to a previous socket. 1406 */ 1407 skb_orphan(p->skb2); 1408 } 1409 } 1410 if (p->skb2 == NULL) { 1411 netlink_overrun(sk); 1412 /* Clone failed. Notify ALL listeners. */ 1413 p->failure = 1; 1414 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1415 p->delivery_failure = 1; 1416 goto out; 1417 } 1418 if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1419 kfree_skb(p->skb2); 1420 p->skb2 = NULL; 1421 goto out; 1422 } 1423 if (sk_filter(sk, p->skb2)) { 1424 kfree_skb(p->skb2); 1425 p->skb2 = NULL; 1426 goto out; 1427 } 1428 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); 1429 if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) 1430 NETLINK_CB(p->skb2).nsid_is_set = true; 1431 val = netlink_broadcast_deliver(sk, p->skb2); 1432 if (val < 0) { 1433 netlink_overrun(sk); 1434 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1435 p->delivery_failure = 1; 1436 } else { 1437 p->congested |= val; 1438 p->delivered = 1; 1439 p->skb2 = NULL; 1440 } 1441 out: 1442 sock_put(sk); 1443 } 1444 1445 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, 1446 u32 group, gfp_t allocation, 1447 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), 1448 void *filter_data) 1449 { 1450 struct net *net = sock_net(ssk); 1451 struct netlink_broadcast_data info; 1452 struct sock *sk; 1453 1454 skb = netlink_trim(skb, allocation); 1455 1456 info.exclude_sk = ssk; 1457 info.net = net; 1458 info.portid = portid; 1459 info.group = group; 1460 info.failure = 0; 1461 info.delivery_failure = 0; 1462 info.congested = 0; 1463 info.delivered = 0; 1464 info.allocation = allocation; 1465 info.skb = skb; 1466 info.skb2 = NULL; 1467 info.tx_filter = filter; 1468 info.tx_data = filter_data; 1469 1470 /* While we sleep in clone, do not allow to change socket list */ 1471 1472 netlink_lock_table(); 1473 1474 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1475 do_one_broadcast(sk, &info); 1476 1477 consume_skb(skb); 1478 1479 netlink_unlock_table(); 1480 1481 if (info.delivery_failure) { 1482 kfree_skb(info.skb2); 1483 return -ENOBUFS; 1484 } 1485 consume_skb(info.skb2); 1486 1487 if (info.delivered) { 1488 if (info.congested && gfpflags_allow_blocking(allocation)) 1489 yield(); 1490 return 0; 1491 } 1492 return -ESRCH; 1493 } 1494 EXPORT_SYMBOL(netlink_broadcast_filtered); 1495 1496 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1497 u32 group, gfp_t allocation) 1498 { 1499 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, 1500 NULL, NULL); 1501 } 1502 EXPORT_SYMBOL(netlink_broadcast); 1503 1504 struct netlink_set_err_data { 1505 struct sock *exclude_sk; 1506 u32 portid; 1507 u32 group; 1508 int code; 1509 }; 1510 1511 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1512 { 1513 struct netlink_sock *nlk = nlk_sk(sk); 1514 int ret = 0; 1515 1516 if (sk == p->exclude_sk) 1517 goto out; 1518 1519 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1520 goto out; 1521 1522 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1523 !test_bit(p->group - 1, nlk->groups)) 1524 goto out; 1525 1526 if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) { 1527 ret = 1; 1528 goto out; 1529 } 1530 1531 sk->sk_err = p->code; 1532 sk->sk_error_report(sk); 1533 out: 1534 return ret; 1535 } 1536 1537 /** 1538 * netlink_set_err - report error to broadcast listeners 1539 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1540 * @portid: the PORTID of a process that we want to skip (if any) 1541 * @group: the broadcast group that will notice the error 1542 * @code: error code, must be negative (as usual in kernelspace) 1543 * 1544 * This function returns the number of broadcast listeners that have set the 1545 * NETLINK_NO_ENOBUFS socket option. 1546 */ 1547 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1548 { 1549 struct netlink_set_err_data info; 1550 struct sock *sk; 1551 int ret = 0; 1552 1553 info.exclude_sk = ssk; 1554 info.portid = portid; 1555 info.group = group; 1556 /* sk->sk_err wants a positive error value */ 1557 info.code = -code; 1558 1559 read_lock(&nl_table_lock); 1560 1561 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1562 ret += do_one_set_err(sk, &info); 1563 1564 read_unlock(&nl_table_lock); 1565 return ret; 1566 } 1567 EXPORT_SYMBOL(netlink_set_err); 1568 1569 /* must be called with netlink table grabbed */ 1570 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1571 unsigned int group, 1572 int is_new) 1573 { 1574 int old, new = !!is_new, subscriptions; 1575 1576 old = test_bit(group - 1, nlk->groups); 1577 subscriptions = nlk->subscriptions - old + new; 1578 if (new) 1579 __set_bit(group - 1, nlk->groups); 1580 else 1581 __clear_bit(group - 1, nlk->groups); 1582 netlink_update_subscriptions(&nlk->sk, subscriptions); 1583 netlink_update_listeners(&nlk->sk); 1584 } 1585 1586 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1587 char __user *optval, unsigned int optlen) 1588 { 1589 struct sock *sk = sock->sk; 1590 struct netlink_sock *nlk = nlk_sk(sk); 1591 unsigned int val = 0; 1592 int err; 1593 1594 if (level != SOL_NETLINK) 1595 return -ENOPROTOOPT; 1596 1597 if (optlen >= sizeof(int) && 1598 get_user(val, (unsigned int __user *)optval)) 1599 return -EFAULT; 1600 1601 switch (optname) { 1602 case NETLINK_PKTINFO: 1603 if (val) 1604 nlk->flags |= NETLINK_F_RECV_PKTINFO; 1605 else 1606 nlk->flags &= ~NETLINK_F_RECV_PKTINFO; 1607 err = 0; 1608 break; 1609 case NETLINK_ADD_MEMBERSHIP: 1610 case NETLINK_DROP_MEMBERSHIP: { 1611 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1612 return -EPERM; 1613 err = netlink_realloc_groups(sk); 1614 if (err) 1615 return err; 1616 if (!val || val - 1 >= nlk->ngroups) 1617 return -EINVAL; 1618 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { 1619 err = nlk->netlink_bind(sock_net(sk), val); 1620 if (err) 1621 return err; 1622 } 1623 netlink_table_grab(); 1624 netlink_update_socket_mc(nlk, val, 1625 optname == NETLINK_ADD_MEMBERSHIP); 1626 netlink_table_ungrab(); 1627 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) 1628 nlk->netlink_unbind(sock_net(sk), val); 1629 1630 err = 0; 1631 break; 1632 } 1633 case NETLINK_BROADCAST_ERROR: 1634 if (val) 1635 nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR; 1636 else 1637 nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR; 1638 err = 0; 1639 break; 1640 case NETLINK_NO_ENOBUFS: 1641 if (val) { 1642 nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS; 1643 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 1644 wake_up_interruptible(&nlk->wait); 1645 } else { 1646 nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS; 1647 } 1648 err = 0; 1649 break; 1650 case NETLINK_LISTEN_ALL_NSID: 1651 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1652 return -EPERM; 1653 1654 if (val) 1655 nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; 1656 else 1657 nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; 1658 err = 0; 1659 break; 1660 case NETLINK_CAP_ACK: 1661 if (val) 1662 nlk->flags |= NETLINK_F_CAP_ACK; 1663 else 1664 nlk->flags &= ~NETLINK_F_CAP_ACK; 1665 err = 0; 1666 break; 1667 case NETLINK_EXT_ACK: 1668 if (val) 1669 nlk->flags |= NETLINK_F_EXT_ACK; 1670 else 1671 nlk->flags &= ~NETLINK_F_EXT_ACK; 1672 err = 0; 1673 break; 1674 default: 1675 err = -ENOPROTOOPT; 1676 } 1677 return err; 1678 } 1679 1680 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1681 char __user *optval, int __user *optlen) 1682 { 1683 struct sock *sk = sock->sk; 1684 struct netlink_sock *nlk = nlk_sk(sk); 1685 int len, val, err; 1686 1687 if (level != SOL_NETLINK) 1688 return -ENOPROTOOPT; 1689 1690 if (get_user(len, optlen)) 1691 return -EFAULT; 1692 if (len < 0) 1693 return -EINVAL; 1694 1695 switch (optname) { 1696 case NETLINK_PKTINFO: 1697 if (len < sizeof(int)) 1698 return -EINVAL; 1699 len = sizeof(int); 1700 val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0; 1701 if (put_user(len, optlen) || 1702 put_user(val, optval)) 1703 return -EFAULT; 1704 err = 0; 1705 break; 1706 case NETLINK_BROADCAST_ERROR: 1707 if (len < sizeof(int)) 1708 return -EINVAL; 1709 len = sizeof(int); 1710 val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0; 1711 if (put_user(len, optlen) || 1712 put_user(val, optval)) 1713 return -EFAULT; 1714 err = 0; 1715 break; 1716 case NETLINK_NO_ENOBUFS: 1717 if (len < sizeof(int)) 1718 return -EINVAL; 1719 len = sizeof(int); 1720 val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0; 1721 if (put_user(len, optlen) || 1722 put_user(val, optval)) 1723 return -EFAULT; 1724 err = 0; 1725 break; 1726 case NETLINK_LIST_MEMBERSHIPS: { 1727 int pos, idx, shift; 1728 1729 err = 0; 1730 netlink_lock_table(); 1731 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 1732 if (len - pos < sizeof(u32)) 1733 break; 1734 1735 idx = pos / sizeof(unsigned long); 1736 shift = (pos % sizeof(unsigned long)) * 8; 1737 if (put_user((u32)(nlk->groups[idx] >> shift), 1738 (u32 __user *)(optval + pos))) { 1739 err = -EFAULT; 1740 break; 1741 } 1742 } 1743 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) 1744 err = -EFAULT; 1745 netlink_unlock_table(); 1746 break; 1747 } 1748 case NETLINK_CAP_ACK: 1749 if (len < sizeof(int)) 1750 return -EINVAL; 1751 len = sizeof(int); 1752 val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0; 1753 if (put_user(len, optlen) || 1754 put_user(val, optval)) 1755 return -EFAULT; 1756 err = 0; 1757 break; 1758 case NETLINK_EXT_ACK: 1759 if (len < sizeof(int)) 1760 return -EINVAL; 1761 len = sizeof(int); 1762 val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0; 1763 if (put_user(len, optlen) || put_user(val, optval)) 1764 return -EFAULT; 1765 err = 0; 1766 break; 1767 default: 1768 err = -ENOPROTOOPT; 1769 } 1770 return err; 1771 } 1772 1773 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1774 { 1775 struct nl_pktinfo info; 1776 1777 info.group = NETLINK_CB(skb).dst_group; 1778 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1779 } 1780 1781 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, 1782 struct sk_buff *skb) 1783 { 1784 if (!NETLINK_CB(skb).nsid_is_set) 1785 return; 1786 1787 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), 1788 &NETLINK_CB(skb).nsid); 1789 } 1790 1791 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1792 { 1793 struct sock *sk = sock->sk; 1794 struct netlink_sock *nlk = nlk_sk(sk); 1795 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1796 u32 dst_portid; 1797 u32 dst_group; 1798 struct sk_buff *skb; 1799 int err; 1800 struct scm_cookie scm; 1801 u32 netlink_skb_flags = 0; 1802 1803 if (msg->msg_flags&MSG_OOB) 1804 return -EOPNOTSUPP; 1805 1806 err = scm_send(sock, msg, &scm, true); 1807 if (err < 0) 1808 return err; 1809 1810 if (msg->msg_namelen) { 1811 err = -EINVAL; 1812 if (addr->nl_family != AF_NETLINK) 1813 goto out; 1814 dst_portid = addr->nl_pid; 1815 dst_group = ffs(addr->nl_groups); 1816 err = -EPERM; 1817 if ((dst_group || dst_portid) && 1818 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1819 goto out; 1820 netlink_skb_flags |= NETLINK_SKB_DST; 1821 } else { 1822 dst_portid = nlk->dst_portid; 1823 dst_group = nlk->dst_group; 1824 } 1825 1826 if (!nlk->bound) { 1827 err = netlink_autobind(sock); 1828 if (err) 1829 goto out; 1830 } else { 1831 /* Ensure nlk is hashed and visible. */ 1832 smp_rmb(); 1833 } 1834 1835 err = -EMSGSIZE; 1836 if (len > sk->sk_sndbuf - 32) 1837 goto out; 1838 err = -ENOBUFS; 1839 skb = netlink_alloc_large_skb(len, dst_group); 1840 if (skb == NULL) 1841 goto out; 1842 1843 NETLINK_CB(skb).portid = nlk->portid; 1844 NETLINK_CB(skb).dst_group = dst_group; 1845 NETLINK_CB(skb).creds = scm.creds; 1846 NETLINK_CB(skb).flags = netlink_skb_flags; 1847 1848 err = -EFAULT; 1849 if (memcpy_from_msg(skb_put(skb, len), msg, len)) { 1850 kfree_skb(skb); 1851 goto out; 1852 } 1853 1854 err = security_netlink_send(sk, skb); 1855 if (err) { 1856 kfree_skb(skb); 1857 goto out; 1858 } 1859 1860 if (dst_group) { 1861 refcount_inc(&skb->users); 1862 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 1863 } 1864 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); 1865 1866 out: 1867 scm_destroy(&scm); 1868 return err; 1869 } 1870 1871 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1872 int flags) 1873 { 1874 struct scm_cookie scm; 1875 struct sock *sk = sock->sk; 1876 struct netlink_sock *nlk = nlk_sk(sk); 1877 int noblock = flags&MSG_DONTWAIT; 1878 size_t copied; 1879 struct sk_buff *skb, *data_skb; 1880 int err, ret; 1881 1882 if (flags&MSG_OOB) 1883 return -EOPNOTSUPP; 1884 1885 copied = 0; 1886 1887 skb = skb_recv_datagram(sk, flags, noblock, &err); 1888 if (skb == NULL) 1889 goto out; 1890 1891 data_skb = skb; 1892 1893 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1894 if (unlikely(skb_shinfo(skb)->frag_list)) { 1895 /* 1896 * If this skb has a frag_list, then here that means that we 1897 * will have to use the frag_list skb's data for compat tasks 1898 * and the regular skb's data for normal (non-compat) tasks. 1899 * 1900 * If we need to send the compat skb, assign it to the 1901 * 'data_skb' variable so that it will be used below for data 1902 * copying. We keep 'skb' for everything else, including 1903 * freeing both later. 1904 */ 1905 if (flags & MSG_CMSG_COMPAT) 1906 data_skb = skb_shinfo(skb)->frag_list; 1907 } 1908 #endif 1909 1910 /* Record the max length of recvmsg() calls for future allocations */ 1911 nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); 1912 nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, 1913 SKB_WITH_OVERHEAD(32768)); 1914 1915 copied = data_skb->len; 1916 if (len < copied) { 1917 msg->msg_flags |= MSG_TRUNC; 1918 copied = len; 1919 } 1920 1921 skb_reset_transport_header(data_skb); 1922 err = skb_copy_datagram_msg(data_skb, 0, msg, copied); 1923 1924 if (msg->msg_name) { 1925 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1926 addr->nl_family = AF_NETLINK; 1927 addr->nl_pad = 0; 1928 addr->nl_pid = NETLINK_CB(skb).portid; 1929 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1930 msg->msg_namelen = sizeof(*addr); 1931 } 1932 1933 if (nlk->flags & NETLINK_F_RECV_PKTINFO) 1934 netlink_cmsg_recv_pktinfo(msg, skb); 1935 if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) 1936 netlink_cmsg_listen_all_nsid(sk, msg, skb); 1937 1938 memset(&scm, 0, sizeof(scm)); 1939 scm.creds = *NETLINK_CREDS(skb); 1940 if (flags & MSG_TRUNC) 1941 copied = data_skb->len; 1942 1943 skb_free_datagram(sk, skb); 1944 1945 if (nlk->cb_running && 1946 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 1947 ret = netlink_dump(sk); 1948 if (ret) { 1949 sk->sk_err = -ret; 1950 sk->sk_error_report(sk); 1951 } 1952 } 1953 1954 scm_recv(sock, msg, &scm, flags); 1955 out: 1956 netlink_rcv_wake(sk); 1957 return err ? : copied; 1958 } 1959 1960 static void netlink_data_ready(struct sock *sk) 1961 { 1962 BUG(); 1963 } 1964 1965 /* 1966 * We export these functions to other modules. They provide a 1967 * complete set of kernel non-blocking support for message 1968 * queueing. 1969 */ 1970 1971 struct sock * 1972 __netlink_kernel_create(struct net *net, int unit, struct module *module, 1973 struct netlink_kernel_cfg *cfg) 1974 { 1975 struct socket *sock; 1976 struct sock *sk; 1977 struct netlink_sock *nlk; 1978 struct listeners *listeners = NULL; 1979 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; 1980 unsigned int groups; 1981 1982 BUG_ON(!nl_table); 1983 1984 if (unit < 0 || unit >= MAX_LINKS) 1985 return NULL; 1986 1987 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1988 return NULL; 1989 1990 if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) 1991 goto out_sock_release_nosk; 1992 1993 sk = sock->sk; 1994 1995 if (!cfg || cfg->groups < 32) 1996 groups = 32; 1997 else 1998 groups = cfg->groups; 1999 2000 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2001 if (!listeners) 2002 goto out_sock_release; 2003 2004 sk->sk_data_ready = netlink_data_ready; 2005 if (cfg && cfg->input) 2006 nlk_sk(sk)->netlink_rcv = cfg->input; 2007 2008 if (netlink_insert(sk, 0)) 2009 goto out_sock_release; 2010 2011 nlk = nlk_sk(sk); 2012 nlk->flags |= NETLINK_F_KERNEL_SOCKET; 2013 2014 netlink_table_grab(); 2015 if (!nl_table[unit].registered) { 2016 nl_table[unit].groups = groups; 2017 rcu_assign_pointer(nl_table[unit].listeners, listeners); 2018 nl_table[unit].cb_mutex = cb_mutex; 2019 nl_table[unit].module = module; 2020 if (cfg) { 2021 nl_table[unit].bind = cfg->bind; 2022 nl_table[unit].unbind = cfg->unbind; 2023 nl_table[unit].flags = cfg->flags; 2024 if (cfg->compare) 2025 nl_table[unit].compare = cfg->compare; 2026 } 2027 nl_table[unit].registered = 1; 2028 } else { 2029 kfree(listeners); 2030 nl_table[unit].registered++; 2031 } 2032 netlink_table_ungrab(); 2033 return sk; 2034 2035 out_sock_release: 2036 kfree(listeners); 2037 netlink_kernel_release(sk); 2038 return NULL; 2039 2040 out_sock_release_nosk: 2041 sock_release(sock); 2042 return NULL; 2043 } 2044 EXPORT_SYMBOL(__netlink_kernel_create); 2045 2046 void 2047 netlink_kernel_release(struct sock *sk) 2048 { 2049 if (sk == NULL || sk->sk_socket == NULL) 2050 return; 2051 2052 sock_release(sk->sk_socket); 2053 } 2054 EXPORT_SYMBOL(netlink_kernel_release); 2055 2056 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 2057 { 2058 struct listeners *new, *old; 2059 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 2060 2061 if (groups < 32) 2062 groups = 32; 2063 2064 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 2065 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 2066 if (!new) 2067 return -ENOMEM; 2068 old = nl_deref_protected(tbl->listeners); 2069 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 2070 rcu_assign_pointer(tbl->listeners, new); 2071 2072 kfree_rcu(old, rcu); 2073 } 2074 tbl->groups = groups; 2075 2076 return 0; 2077 } 2078 2079 /** 2080 * netlink_change_ngroups - change number of multicast groups 2081 * 2082 * This changes the number of multicast groups that are available 2083 * on a certain netlink family. Note that it is not possible to 2084 * change the number of groups to below 32. Also note that it does 2085 * not implicitly call netlink_clear_multicast_users() when the 2086 * number of groups is reduced. 2087 * 2088 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 2089 * @groups: The new number of groups. 2090 */ 2091 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 2092 { 2093 int err; 2094 2095 netlink_table_grab(); 2096 err = __netlink_change_ngroups(sk, groups); 2097 netlink_table_ungrab(); 2098 2099 return err; 2100 } 2101 2102 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2103 { 2104 struct sock *sk; 2105 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 2106 2107 sk_for_each_bound(sk, &tbl->mc_list) 2108 netlink_update_socket_mc(nlk_sk(sk), group, 0); 2109 } 2110 2111 struct nlmsghdr * 2112 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2113 { 2114 struct nlmsghdr *nlh; 2115 int size = nlmsg_msg_size(len); 2116 2117 nlh = skb_put(skb, NLMSG_ALIGN(size)); 2118 nlh->nlmsg_type = type; 2119 nlh->nlmsg_len = size; 2120 nlh->nlmsg_flags = flags; 2121 nlh->nlmsg_pid = portid; 2122 nlh->nlmsg_seq = seq; 2123 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2124 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2125 return nlh; 2126 } 2127 EXPORT_SYMBOL(__nlmsg_put); 2128 2129 /* 2130 * It looks a bit ugly. 2131 * It would be better to create kernel thread. 2132 */ 2133 2134 static int netlink_dump(struct sock *sk) 2135 { 2136 struct netlink_sock *nlk = nlk_sk(sk); 2137 struct netlink_callback *cb; 2138 struct sk_buff *skb = NULL; 2139 struct nlmsghdr *nlh; 2140 struct module *module; 2141 int err = -ENOBUFS; 2142 int alloc_min_size; 2143 int alloc_size; 2144 2145 mutex_lock(nlk->cb_mutex); 2146 if (!nlk->cb_running) { 2147 err = -EINVAL; 2148 goto errout_skb; 2149 } 2150 2151 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2152 goto errout_skb; 2153 2154 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2155 * required, but it makes sense to _attempt_ a 16K bytes allocation 2156 * to reduce number of system calls on dump operations, if user 2157 * ever provided a big enough buffer. 2158 */ 2159 cb = &nlk->cb; 2160 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2161 2162 if (alloc_min_size < nlk->max_recvmsg_len) { 2163 alloc_size = nlk->max_recvmsg_len; 2164 skb = alloc_skb(alloc_size, 2165 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | 2166 __GFP_NOWARN | __GFP_NORETRY); 2167 } 2168 if (!skb) { 2169 alloc_size = alloc_min_size; 2170 skb = alloc_skb(alloc_size, GFP_KERNEL); 2171 } 2172 if (!skb) 2173 goto errout_skb; 2174 2175 /* Trim skb to allocated size. User is expected to provide buffer as 2176 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at 2177 * netlink_recvmsg())). dump will pack as many smaller messages as 2178 * could fit within the allocated skb. skb is typically allocated 2179 * with larger space than required (could be as much as near 2x the 2180 * requested size with align to next power of 2 approach). Allowing 2181 * dump to use the excess space makes it difficult for a user to have a 2182 * reasonable static buffer based on the expected largest dump of a 2183 * single netdev. The outcome is MSG_TRUNC error. 2184 */ 2185 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2186 netlink_skb_set_owner_r(skb, sk); 2187 2188 if (nlk->dump_done_errno > 0) 2189 nlk->dump_done_errno = cb->dump(skb, cb); 2190 2191 if (nlk->dump_done_errno > 0 || 2192 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { 2193 mutex_unlock(nlk->cb_mutex); 2194 2195 if (sk_filter(sk, skb)) 2196 kfree_skb(skb); 2197 else 2198 __netlink_sendskb(sk, skb); 2199 return 0; 2200 } 2201 2202 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, 2203 sizeof(nlk->dump_done_errno), NLM_F_MULTI); 2204 if (WARN_ON(!nlh)) 2205 goto errout_skb; 2206 2207 nl_dump_check_consistent(cb, nlh); 2208 2209 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, 2210 sizeof(nlk->dump_done_errno)); 2211 2212 if (sk_filter(sk, skb)) 2213 kfree_skb(skb); 2214 else 2215 __netlink_sendskb(sk, skb); 2216 2217 if (cb->done) 2218 cb->done(cb); 2219 2220 nlk->cb_running = false; 2221 module = cb->module; 2222 skb = cb->skb; 2223 mutex_unlock(nlk->cb_mutex); 2224 module_put(module); 2225 consume_skb(skb); 2226 return 0; 2227 2228 errout_skb: 2229 mutex_unlock(nlk->cb_mutex); 2230 kfree_skb(skb); 2231 return err; 2232 } 2233 2234 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 2235 const struct nlmsghdr *nlh, 2236 struct netlink_dump_control *control) 2237 { 2238 struct netlink_callback *cb; 2239 struct sock *sk; 2240 struct netlink_sock *nlk; 2241 int ret; 2242 2243 refcount_inc(&skb->users); 2244 2245 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2246 if (sk == NULL) { 2247 ret = -ECONNREFUSED; 2248 goto error_free; 2249 } 2250 2251 nlk = nlk_sk(sk); 2252 mutex_lock(nlk->cb_mutex); 2253 /* A dump is in progress... */ 2254 if (nlk->cb_running) { 2255 ret = -EBUSY; 2256 goto error_unlock; 2257 } 2258 /* add reference of module which cb->dump belongs to */ 2259 if (!try_module_get(control->module)) { 2260 ret = -EPROTONOSUPPORT; 2261 goto error_unlock; 2262 } 2263 2264 cb = &nlk->cb; 2265 memset(cb, 0, sizeof(*cb)); 2266 cb->start = control->start; 2267 cb->dump = control->dump; 2268 cb->done = control->done; 2269 cb->nlh = nlh; 2270 cb->data = control->data; 2271 cb->module = control->module; 2272 cb->min_dump_alloc = control->min_dump_alloc; 2273 cb->skb = skb; 2274 2275 if (cb->start) { 2276 ret = cb->start(cb); 2277 if (ret) 2278 goto error_unlock; 2279 } 2280 2281 nlk->cb_running = true; 2282 nlk->dump_done_errno = INT_MAX; 2283 2284 mutex_unlock(nlk->cb_mutex); 2285 2286 ret = netlink_dump(sk); 2287 2288 sock_put(sk); 2289 2290 if (ret) 2291 return ret; 2292 2293 /* We successfully started a dump, by returning -EINTR we 2294 * signal not to send ACK even if it was requested. 2295 */ 2296 return -EINTR; 2297 2298 error_unlock: 2299 sock_put(sk); 2300 mutex_unlock(nlk->cb_mutex); 2301 error_free: 2302 kfree_skb(skb); 2303 return ret; 2304 } 2305 EXPORT_SYMBOL(__netlink_dump_start); 2306 2307 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, 2308 const struct netlink_ext_ack *extack) 2309 { 2310 struct sk_buff *skb; 2311 struct nlmsghdr *rep; 2312 struct nlmsgerr *errmsg; 2313 size_t payload = sizeof(*errmsg); 2314 size_t tlvlen = 0; 2315 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); 2316 unsigned int flags = 0; 2317 bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK; 2318 2319 /* Error messages get the original request appened, unless the user 2320 * requests to cap the error message, and get extra error data if 2321 * requested. 2322 */ 2323 if (nlk_has_extack && extack && extack->_msg) 2324 tlvlen += nla_total_size(strlen(extack->_msg) + 1); 2325 2326 if (err) { 2327 if (!(nlk->flags & NETLINK_F_CAP_ACK)) 2328 payload += nlmsg_len(nlh); 2329 else 2330 flags |= NLM_F_CAPPED; 2331 if (nlk_has_extack && extack && extack->bad_attr) 2332 tlvlen += nla_total_size(sizeof(u32)); 2333 } else { 2334 flags |= NLM_F_CAPPED; 2335 2336 if (nlk_has_extack && extack && extack->cookie_len) 2337 tlvlen += nla_total_size(extack->cookie_len); 2338 } 2339 2340 if (tlvlen) 2341 flags |= NLM_F_ACK_TLVS; 2342 2343 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); 2344 if (!skb) { 2345 NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; 2346 NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk); 2347 return; 2348 } 2349 2350 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2351 NLMSG_ERROR, payload, flags); 2352 errmsg = nlmsg_data(rep); 2353 errmsg->error = err; 2354 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); 2355 2356 if (nlk_has_extack && extack) { 2357 if (extack->_msg) { 2358 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, 2359 extack->_msg)); 2360 } 2361 if (err) { 2362 if (extack->bad_attr && 2363 !WARN_ON((u8 *)extack->bad_attr < in_skb->data || 2364 (u8 *)extack->bad_attr >= in_skb->data + 2365 in_skb->len)) 2366 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, 2367 (u8 *)extack->bad_attr - 2368 in_skb->data)); 2369 } else { 2370 if (extack->cookie_len) 2371 WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, 2372 extack->cookie_len, 2373 extack->cookie)); 2374 } 2375 } 2376 2377 nlmsg_end(skb, rep); 2378 2379 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); 2380 } 2381 EXPORT_SYMBOL(netlink_ack); 2382 2383 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 2384 struct nlmsghdr *, 2385 struct netlink_ext_ack *)) 2386 { 2387 struct netlink_ext_ack extack = {}; 2388 struct nlmsghdr *nlh; 2389 int err; 2390 2391 while (skb->len >= nlmsg_total_size(0)) { 2392 int msglen; 2393 2394 nlh = nlmsg_hdr(skb); 2395 err = 0; 2396 2397 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 2398 return 0; 2399 2400 /* Only requests are handled by the kernel */ 2401 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 2402 goto ack; 2403 2404 /* Skip control messages */ 2405 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 2406 goto ack; 2407 2408 err = cb(skb, nlh, &extack); 2409 if (err == -EINTR) 2410 goto skip; 2411 2412 ack: 2413 if (nlh->nlmsg_flags & NLM_F_ACK || err) 2414 netlink_ack(skb, nlh, err, &extack); 2415 2416 skip: 2417 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 2418 if (msglen > skb->len) 2419 msglen = skb->len; 2420 skb_pull(skb, msglen); 2421 } 2422 2423 return 0; 2424 } 2425 EXPORT_SYMBOL(netlink_rcv_skb); 2426 2427 /** 2428 * nlmsg_notify - send a notification netlink message 2429 * @sk: netlink socket to use 2430 * @skb: notification message 2431 * @portid: destination netlink portid for reports or 0 2432 * @group: destination multicast group or 0 2433 * @report: 1 to report back, 0 to disable 2434 * @flags: allocation flags 2435 */ 2436 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 2437 unsigned int group, int report, gfp_t flags) 2438 { 2439 int err = 0; 2440 2441 if (group) { 2442 int exclude_portid = 0; 2443 2444 if (report) { 2445 refcount_inc(&skb->users); 2446 exclude_portid = portid; 2447 } 2448 2449 /* errors reported via destination sk->sk_err, but propagate 2450 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 2451 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 2452 } 2453 2454 if (report) { 2455 int err2; 2456 2457 err2 = nlmsg_unicast(sk, skb, portid); 2458 if (!err || err == -ESRCH) 2459 err = err2; 2460 } 2461 2462 return err; 2463 } 2464 EXPORT_SYMBOL(nlmsg_notify); 2465 2466 #ifdef CONFIG_PROC_FS 2467 struct nl_seq_iter { 2468 struct seq_net_private p; 2469 struct rhashtable_iter hti; 2470 int link; 2471 }; 2472 2473 static int netlink_walk_start(struct nl_seq_iter *iter) 2474 { 2475 int err; 2476 2477 err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti, 2478 GFP_KERNEL); 2479 if (err) { 2480 iter->link = MAX_LINKS; 2481 return err; 2482 } 2483 2484 err = rhashtable_walk_start(&iter->hti); 2485 return err == -EAGAIN ? 0 : err; 2486 } 2487 2488 static void netlink_walk_stop(struct nl_seq_iter *iter) 2489 { 2490 rhashtable_walk_stop(&iter->hti); 2491 rhashtable_walk_exit(&iter->hti); 2492 } 2493 2494 static void *__netlink_seq_next(struct seq_file *seq) 2495 { 2496 struct nl_seq_iter *iter = seq->private; 2497 struct netlink_sock *nlk; 2498 2499 do { 2500 for (;;) { 2501 int err; 2502 2503 nlk = rhashtable_walk_next(&iter->hti); 2504 2505 if (IS_ERR(nlk)) { 2506 if (PTR_ERR(nlk) == -EAGAIN) 2507 continue; 2508 2509 return nlk; 2510 } 2511 2512 if (nlk) 2513 break; 2514 2515 netlink_walk_stop(iter); 2516 if (++iter->link >= MAX_LINKS) 2517 return NULL; 2518 2519 err = netlink_walk_start(iter); 2520 if (err) 2521 return ERR_PTR(err); 2522 } 2523 } while (sock_net(&nlk->sk) != seq_file_net(seq)); 2524 2525 return nlk; 2526 } 2527 2528 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) 2529 { 2530 struct nl_seq_iter *iter = seq->private; 2531 void *obj = SEQ_START_TOKEN; 2532 loff_t pos; 2533 int err; 2534 2535 iter->link = 0; 2536 2537 err = netlink_walk_start(iter); 2538 if (err) 2539 return ERR_PTR(err); 2540 2541 for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) 2542 obj = __netlink_seq_next(seq); 2543 2544 return obj; 2545 } 2546 2547 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2548 { 2549 ++*pos; 2550 return __netlink_seq_next(seq); 2551 } 2552 2553 static void netlink_seq_stop(struct seq_file *seq, void *v) 2554 { 2555 struct nl_seq_iter *iter = seq->private; 2556 2557 if (iter->link >= MAX_LINKS) 2558 return; 2559 2560 netlink_walk_stop(iter); 2561 } 2562 2563 2564 static int netlink_seq_show(struct seq_file *seq, void *v) 2565 { 2566 if (v == SEQ_START_TOKEN) { 2567 seq_puts(seq, 2568 "sk Eth Pid Groups " 2569 "Rmem Wmem Dump Locks Drops Inode\n"); 2570 } else { 2571 struct sock *s = v; 2572 struct netlink_sock *nlk = nlk_sk(s); 2573 2574 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n", 2575 s, 2576 s->sk_protocol, 2577 nlk->portid, 2578 nlk->groups ? (u32)nlk->groups[0] : 0, 2579 sk_rmem_alloc_get(s), 2580 sk_wmem_alloc_get(s), 2581 nlk->cb_running, 2582 refcount_read(&s->sk_refcnt), 2583 atomic_read(&s->sk_drops), 2584 sock_i_ino(s) 2585 ); 2586 2587 } 2588 return 0; 2589 } 2590 2591 static const struct seq_operations netlink_seq_ops = { 2592 .start = netlink_seq_start, 2593 .next = netlink_seq_next, 2594 .stop = netlink_seq_stop, 2595 .show = netlink_seq_show, 2596 }; 2597 2598 2599 static int netlink_seq_open(struct inode *inode, struct file *file) 2600 { 2601 return seq_open_net(inode, file, &netlink_seq_ops, 2602 sizeof(struct nl_seq_iter)); 2603 } 2604 2605 static const struct file_operations netlink_seq_fops = { 2606 .owner = THIS_MODULE, 2607 .open = netlink_seq_open, 2608 .read = seq_read, 2609 .llseek = seq_lseek, 2610 .release = seq_release_net, 2611 }; 2612 2613 #endif 2614 2615 int netlink_register_notifier(struct notifier_block *nb) 2616 { 2617 return blocking_notifier_chain_register(&netlink_chain, nb); 2618 } 2619 EXPORT_SYMBOL(netlink_register_notifier); 2620 2621 int netlink_unregister_notifier(struct notifier_block *nb) 2622 { 2623 return blocking_notifier_chain_unregister(&netlink_chain, nb); 2624 } 2625 EXPORT_SYMBOL(netlink_unregister_notifier); 2626 2627 static const struct proto_ops netlink_ops = { 2628 .family = PF_NETLINK, 2629 .owner = THIS_MODULE, 2630 .release = netlink_release, 2631 .bind = netlink_bind, 2632 .connect = netlink_connect, 2633 .socketpair = sock_no_socketpair, 2634 .accept = sock_no_accept, 2635 .getname = netlink_getname, 2636 .poll = datagram_poll, 2637 .ioctl = netlink_ioctl, 2638 .listen = sock_no_listen, 2639 .shutdown = sock_no_shutdown, 2640 .setsockopt = netlink_setsockopt, 2641 .getsockopt = netlink_getsockopt, 2642 .sendmsg = netlink_sendmsg, 2643 .recvmsg = netlink_recvmsg, 2644 .mmap = sock_no_mmap, 2645 .sendpage = sock_no_sendpage, 2646 }; 2647 2648 static const struct net_proto_family netlink_family_ops = { 2649 .family = PF_NETLINK, 2650 .create = netlink_create, 2651 .owner = THIS_MODULE, /* for consistency 8) */ 2652 }; 2653 2654 static int __net_init netlink_net_init(struct net *net) 2655 { 2656 #ifdef CONFIG_PROC_FS 2657 if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops)) 2658 return -ENOMEM; 2659 #endif 2660 return 0; 2661 } 2662 2663 static void __net_exit netlink_net_exit(struct net *net) 2664 { 2665 #ifdef CONFIG_PROC_FS 2666 remove_proc_entry("netlink", net->proc_net); 2667 #endif 2668 } 2669 2670 static void __init netlink_add_usersock_entry(void) 2671 { 2672 struct listeners *listeners; 2673 int groups = 32; 2674 2675 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2676 if (!listeners) 2677 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2678 2679 netlink_table_grab(); 2680 2681 nl_table[NETLINK_USERSOCK].groups = groups; 2682 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2683 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2684 nl_table[NETLINK_USERSOCK].registered = 1; 2685 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2686 2687 netlink_table_ungrab(); 2688 } 2689 2690 static struct pernet_operations __net_initdata netlink_net_ops = { 2691 .init = netlink_net_init, 2692 .exit = netlink_net_exit, 2693 }; 2694 2695 static inline u32 netlink_hash(const void *data, u32 len, u32 seed) 2696 { 2697 const struct netlink_sock *nlk = data; 2698 struct netlink_compare_arg arg; 2699 2700 netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); 2701 return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); 2702 } 2703 2704 static const struct rhashtable_params netlink_rhashtable_params = { 2705 .head_offset = offsetof(struct netlink_sock, node), 2706 .key_len = netlink_compare_arg_len, 2707 .obj_hashfn = netlink_hash, 2708 .obj_cmpfn = netlink_compare, 2709 .automatic_shrinking = true, 2710 }; 2711 2712 static int __init netlink_proto_init(void) 2713 { 2714 int i; 2715 int err = proto_register(&netlink_proto, 0); 2716 2717 if (err != 0) 2718 goto out; 2719 2720 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2721 2722 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2723 if (!nl_table) 2724 goto panic; 2725 2726 for (i = 0; i < MAX_LINKS; i++) { 2727 if (rhashtable_init(&nl_table[i].hash, 2728 &netlink_rhashtable_params) < 0) { 2729 while (--i > 0) 2730 rhashtable_destroy(&nl_table[i].hash); 2731 kfree(nl_table); 2732 goto panic; 2733 } 2734 } 2735 2736 INIT_LIST_HEAD(&netlink_tap_all); 2737 2738 netlink_add_usersock_entry(); 2739 2740 sock_register(&netlink_family_ops); 2741 register_pernet_subsys(&netlink_net_ops); 2742 /* The netlink device handler may be needed early. */ 2743 rtnetlink_init(); 2744 out: 2745 return err; 2746 panic: 2747 panic("netlink_init: Cannot allocate nl_table\n"); 2748 } 2749 2750 core_initcall(netlink_proto_init); 2751