1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * Patrick McHardy <kaber@trash.net> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 14 * added netlink_proto_exit 15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 16 * use nlk_sk, as sk->protinfo is on a diet 8) 17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 18 * - inc module use count of module that owns 19 * the kernel socket in case userspace opens 20 * socket of same protocol 21 * - remove all module support, since netlink is 22 * mandatory if CONFIG_NET=y these days 23 */ 24 25 #include <linux/module.h> 26 27 #include <linux/capability.h> 28 #include <linux/kernel.h> 29 #include <linux/init.h> 30 #include <linux/signal.h> 31 #include <linux/sched.h> 32 #include <linux/errno.h> 33 #include <linux/string.h> 34 #include <linux/stat.h> 35 #include <linux/socket.h> 36 #include <linux/un.h> 37 #include <linux/fcntl.h> 38 #include <linux/termios.h> 39 #include <linux/sockios.h> 40 #include <linux/net.h> 41 #include <linux/fs.h> 42 #include <linux/slab.h> 43 #include <linux/uaccess.h> 44 #include <linux/skbuff.h> 45 #include <linux/netdevice.h> 46 #include <linux/rtnetlink.h> 47 #include <linux/proc_fs.h> 48 #include <linux/seq_file.h> 49 #include <linux/notifier.h> 50 #include <linux/security.h> 51 #include <linux/jhash.h> 52 #include <linux/jiffies.h> 53 #include <linux/random.h> 54 #include <linux/bitops.h> 55 #include <linux/mm.h> 56 #include <linux/types.h> 57 #include <linux/audit.h> 58 #include <linux/mutex.h> 59 #include <linux/vmalloc.h> 60 #include <linux/if_arp.h> 61 #include <linux/rhashtable.h> 62 #include <asm/cacheflush.h> 63 #include <linux/hash.h> 64 #include <linux/genetlink.h> 65 66 #include <net/net_namespace.h> 67 #include <net/sock.h> 68 #include <net/scm.h> 69 #include <net/netlink.h> 70 71 #include "af_netlink.h" 72 73 struct listeners { 74 struct rcu_head rcu; 75 unsigned long masks[0]; 76 }; 77 78 /* state bits */ 79 #define NETLINK_S_CONGESTED 0x0 80 81 /* flags */ 82 #define NETLINK_F_KERNEL_SOCKET 0x1 83 #define NETLINK_F_RECV_PKTINFO 0x2 84 #define NETLINK_F_BROADCAST_SEND_ERROR 0x4 85 #define NETLINK_F_RECV_NO_ENOBUFS 0x8 86 #define NETLINK_F_LISTEN_ALL_NSID 0x10 87 #define NETLINK_F_CAP_ACK 0x20 88 89 static inline int netlink_is_kernel(struct sock *sk) 90 { 91 return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; 92 } 93 94 struct netlink_table *nl_table __read_mostly; 95 EXPORT_SYMBOL_GPL(nl_table); 96 97 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 98 99 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; 100 101 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { 102 "nlk_cb_mutex-ROUTE", 103 "nlk_cb_mutex-1", 104 "nlk_cb_mutex-USERSOCK", 105 "nlk_cb_mutex-FIREWALL", 106 "nlk_cb_mutex-SOCK_DIAG", 107 "nlk_cb_mutex-NFLOG", 108 "nlk_cb_mutex-XFRM", 109 "nlk_cb_mutex-SELINUX", 110 "nlk_cb_mutex-ISCSI", 111 "nlk_cb_mutex-AUDIT", 112 "nlk_cb_mutex-FIB_LOOKUP", 113 "nlk_cb_mutex-CONNECTOR", 114 "nlk_cb_mutex-NETFILTER", 115 "nlk_cb_mutex-IP6_FW", 116 "nlk_cb_mutex-DNRTMSG", 117 "nlk_cb_mutex-KOBJECT_UEVENT", 118 "nlk_cb_mutex-GENERIC", 119 "nlk_cb_mutex-17", 120 "nlk_cb_mutex-SCSITRANSPORT", 121 "nlk_cb_mutex-ECRYPTFS", 122 "nlk_cb_mutex-RDMA", 123 "nlk_cb_mutex-CRYPTO", 124 "nlk_cb_mutex-SMC", 125 "nlk_cb_mutex-23", 126 "nlk_cb_mutex-24", 127 "nlk_cb_mutex-25", 128 "nlk_cb_mutex-26", 129 "nlk_cb_mutex-27", 130 "nlk_cb_mutex-28", 131 "nlk_cb_mutex-29", 132 "nlk_cb_mutex-30", 133 "nlk_cb_mutex-31", 134 "nlk_cb_mutex-MAX_LINKS" 135 }; 136 137 static int netlink_dump(struct sock *sk); 138 static void netlink_skb_destructor(struct sk_buff *skb); 139 140 /* nl_table locking explained: 141 * Lookup and traversal are protected with an RCU read-side lock. Insertion 142 * and removal are protected with per bucket lock while using RCU list 143 * modification primitives and may run in parallel to RCU protected lookups. 144 * Destruction of the Netlink socket may only occur *after* nl_table_lock has 145 * been acquired * either during or after the socket has been removed from 146 * the list and after an RCU grace period. 147 */ 148 DEFINE_RWLOCK(nl_table_lock); 149 EXPORT_SYMBOL_GPL(nl_table_lock); 150 static atomic_t nl_table_users = ATOMIC_INIT(0); 151 152 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 153 154 static BLOCKING_NOTIFIER_HEAD(netlink_chain); 155 156 static DEFINE_SPINLOCK(netlink_tap_lock); 157 static struct list_head netlink_tap_all __read_mostly; 158 159 static const struct rhashtable_params netlink_rhashtable_params; 160 161 static inline u32 netlink_group_mask(u32 group) 162 { 163 return group ? 1 << (group - 1) : 0; 164 } 165 166 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, 167 gfp_t gfp_mask) 168 { 169 unsigned int len = skb_end_offset(skb); 170 struct sk_buff *new; 171 172 new = alloc_skb(len, gfp_mask); 173 if (new == NULL) 174 return NULL; 175 176 NETLINK_CB(new).portid = NETLINK_CB(skb).portid; 177 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; 178 NETLINK_CB(new).creds = NETLINK_CB(skb).creds; 179 180 memcpy(skb_put(new, len), skb->data, len); 181 return new; 182 } 183 184 int netlink_add_tap(struct netlink_tap *nt) 185 { 186 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 187 return -EINVAL; 188 189 spin_lock(&netlink_tap_lock); 190 list_add_rcu(&nt->list, &netlink_tap_all); 191 spin_unlock(&netlink_tap_lock); 192 193 __module_get(nt->module); 194 195 return 0; 196 } 197 EXPORT_SYMBOL_GPL(netlink_add_tap); 198 199 static int __netlink_remove_tap(struct netlink_tap *nt) 200 { 201 bool found = false; 202 struct netlink_tap *tmp; 203 204 spin_lock(&netlink_tap_lock); 205 206 list_for_each_entry(tmp, &netlink_tap_all, list) { 207 if (nt == tmp) { 208 list_del_rcu(&nt->list); 209 found = true; 210 goto out; 211 } 212 } 213 214 pr_warn("__netlink_remove_tap: %p not found\n", nt); 215 out: 216 spin_unlock(&netlink_tap_lock); 217 218 if (found) 219 module_put(nt->module); 220 221 return found ? 0 : -ENODEV; 222 } 223 224 int netlink_remove_tap(struct netlink_tap *nt) 225 { 226 int ret; 227 228 ret = __netlink_remove_tap(nt); 229 synchronize_net(); 230 231 return ret; 232 } 233 EXPORT_SYMBOL_GPL(netlink_remove_tap); 234 235 static bool netlink_filter_tap(const struct sk_buff *skb) 236 { 237 struct sock *sk = skb->sk; 238 239 /* We take the more conservative approach and 240 * whitelist socket protocols that may pass. 241 */ 242 switch (sk->sk_protocol) { 243 case NETLINK_ROUTE: 244 case NETLINK_USERSOCK: 245 case NETLINK_SOCK_DIAG: 246 case NETLINK_NFLOG: 247 case NETLINK_XFRM: 248 case NETLINK_FIB_LOOKUP: 249 case NETLINK_NETFILTER: 250 case NETLINK_GENERIC: 251 return true; 252 } 253 254 return false; 255 } 256 257 static int __netlink_deliver_tap_skb(struct sk_buff *skb, 258 struct net_device *dev) 259 { 260 struct sk_buff *nskb; 261 struct sock *sk = skb->sk; 262 int ret = -ENOMEM; 263 264 dev_hold(dev); 265 266 if (is_vmalloc_addr(skb->head)) 267 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 268 else 269 nskb = skb_clone(skb, GFP_ATOMIC); 270 if (nskb) { 271 nskb->dev = dev; 272 nskb->protocol = htons((u16) sk->sk_protocol); 273 nskb->pkt_type = netlink_is_kernel(sk) ? 274 PACKET_KERNEL : PACKET_USER; 275 skb_reset_network_header(nskb); 276 ret = dev_queue_xmit(nskb); 277 if (unlikely(ret > 0)) 278 ret = net_xmit_errno(ret); 279 } 280 281 dev_put(dev); 282 return ret; 283 } 284 285 static void __netlink_deliver_tap(struct sk_buff *skb) 286 { 287 int ret; 288 struct netlink_tap *tmp; 289 290 if (!netlink_filter_tap(skb)) 291 return; 292 293 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { 294 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 295 if (unlikely(ret)) 296 break; 297 } 298 } 299 300 static void netlink_deliver_tap(struct sk_buff *skb) 301 { 302 rcu_read_lock(); 303 304 if (unlikely(!list_empty(&netlink_tap_all))) 305 __netlink_deliver_tap(skb); 306 307 rcu_read_unlock(); 308 } 309 310 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, 311 struct sk_buff *skb) 312 { 313 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) 314 netlink_deliver_tap(skb); 315 } 316 317 static void netlink_overrun(struct sock *sk) 318 { 319 struct netlink_sock *nlk = nlk_sk(sk); 320 321 if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { 322 if (!test_and_set_bit(NETLINK_S_CONGESTED, 323 &nlk_sk(sk)->state)) { 324 sk->sk_err = ENOBUFS; 325 sk->sk_error_report(sk); 326 } 327 } 328 atomic_inc(&sk->sk_drops); 329 } 330 331 static void netlink_rcv_wake(struct sock *sk) 332 { 333 struct netlink_sock *nlk = nlk_sk(sk); 334 335 if (skb_queue_empty(&sk->sk_receive_queue)) 336 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 337 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) 338 wake_up_interruptible(&nlk->wait); 339 } 340 341 static void netlink_skb_destructor(struct sk_buff *skb) 342 { 343 if (is_vmalloc_addr(skb->head)) { 344 if (!skb->cloned || 345 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 346 vfree(skb->head); 347 348 skb->head = NULL; 349 } 350 if (skb->sk != NULL) 351 sock_rfree(skb); 352 } 353 354 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 355 { 356 WARN_ON(skb->sk != NULL); 357 skb->sk = sk; 358 skb->destructor = netlink_skb_destructor; 359 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 360 sk_mem_charge(sk, skb->truesize); 361 } 362 363 static void netlink_sock_destruct(struct sock *sk) 364 { 365 struct netlink_sock *nlk = nlk_sk(sk); 366 367 if (nlk->cb_running) { 368 if (nlk->cb.done) 369 nlk->cb.done(&nlk->cb); 370 module_put(nlk->cb.module); 371 kfree_skb(nlk->cb.skb); 372 } 373 374 skb_queue_purge(&sk->sk_receive_queue); 375 376 if (!sock_flag(sk, SOCK_DEAD)) { 377 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 378 return; 379 } 380 381 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 382 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 383 WARN_ON(nlk_sk(sk)->groups); 384 } 385 386 static void netlink_sock_destruct_work(struct work_struct *work) 387 { 388 struct netlink_sock *nlk = container_of(work, struct netlink_sock, 389 work); 390 391 sk_free(&nlk->sk); 392 } 393 394 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 395 * SMP. Look, when several writers sleep and reader wakes them up, all but one 396 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 397 * this, _but_ remember, it adds useless work on UP machines. 398 */ 399 400 void netlink_table_grab(void) 401 __acquires(nl_table_lock) 402 { 403 might_sleep(); 404 405 write_lock_irq(&nl_table_lock); 406 407 if (atomic_read(&nl_table_users)) { 408 DECLARE_WAITQUEUE(wait, current); 409 410 add_wait_queue_exclusive(&nl_table_wait, &wait); 411 for (;;) { 412 set_current_state(TASK_UNINTERRUPTIBLE); 413 if (atomic_read(&nl_table_users) == 0) 414 break; 415 write_unlock_irq(&nl_table_lock); 416 schedule(); 417 write_lock_irq(&nl_table_lock); 418 } 419 420 __set_current_state(TASK_RUNNING); 421 remove_wait_queue(&nl_table_wait, &wait); 422 } 423 } 424 425 void netlink_table_ungrab(void) 426 __releases(nl_table_lock) 427 { 428 write_unlock_irq(&nl_table_lock); 429 wake_up(&nl_table_wait); 430 } 431 432 static inline void 433 netlink_lock_table(void) 434 { 435 /* read_lock() synchronizes us to netlink_table_grab */ 436 437 read_lock(&nl_table_lock); 438 atomic_inc(&nl_table_users); 439 read_unlock(&nl_table_lock); 440 } 441 442 static inline void 443 netlink_unlock_table(void) 444 { 445 if (atomic_dec_and_test(&nl_table_users)) 446 wake_up(&nl_table_wait); 447 } 448 449 struct netlink_compare_arg 450 { 451 possible_net_t pnet; 452 u32 portid; 453 }; 454 455 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ 456 #define netlink_compare_arg_len \ 457 (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) 458 459 static inline int netlink_compare(struct rhashtable_compare_arg *arg, 460 const void *ptr) 461 { 462 const struct netlink_compare_arg *x = arg->key; 463 const struct netlink_sock *nlk = ptr; 464 465 return nlk->portid != x->portid || 466 !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); 467 } 468 469 static void netlink_compare_arg_init(struct netlink_compare_arg *arg, 470 struct net *net, u32 portid) 471 { 472 memset(arg, 0, sizeof(*arg)); 473 write_pnet(&arg->pnet, net); 474 arg->portid = portid; 475 } 476 477 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, 478 struct net *net) 479 { 480 struct netlink_compare_arg arg; 481 482 netlink_compare_arg_init(&arg, net, portid); 483 return rhashtable_lookup_fast(&table->hash, &arg, 484 netlink_rhashtable_params); 485 } 486 487 static int __netlink_insert(struct netlink_table *table, struct sock *sk) 488 { 489 struct netlink_compare_arg arg; 490 491 netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); 492 return rhashtable_lookup_insert_key(&table->hash, &arg, 493 &nlk_sk(sk)->node, 494 netlink_rhashtable_params); 495 } 496 497 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 498 { 499 struct netlink_table *table = &nl_table[protocol]; 500 struct sock *sk; 501 502 rcu_read_lock(); 503 sk = __netlink_lookup(table, portid, net); 504 if (sk) 505 sock_hold(sk); 506 rcu_read_unlock(); 507 508 return sk; 509 } 510 511 static const struct proto_ops netlink_ops; 512 513 static void 514 netlink_update_listeners(struct sock *sk) 515 { 516 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 517 unsigned long mask; 518 unsigned int i; 519 struct listeners *listeners; 520 521 listeners = nl_deref_protected(tbl->listeners); 522 if (!listeners) 523 return; 524 525 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 526 mask = 0; 527 sk_for_each_bound(sk, &tbl->mc_list) { 528 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 529 mask |= nlk_sk(sk)->groups[i]; 530 } 531 listeners->masks[i] = mask; 532 } 533 /* this function is only called with the netlink table "grabbed", which 534 * makes sure updates are visible before bind or setsockopt return. */ 535 } 536 537 static int netlink_insert(struct sock *sk, u32 portid) 538 { 539 struct netlink_table *table = &nl_table[sk->sk_protocol]; 540 int err; 541 542 lock_sock(sk); 543 544 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; 545 if (nlk_sk(sk)->bound) 546 goto err; 547 548 err = -ENOMEM; 549 if (BITS_PER_LONG > 32 && 550 unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) 551 goto err; 552 553 nlk_sk(sk)->portid = portid; 554 sock_hold(sk); 555 556 err = __netlink_insert(table, sk); 557 if (err) { 558 /* In case the hashtable backend returns with -EBUSY 559 * from here, it must not escape to the caller. 560 */ 561 if (unlikely(err == -EBUSY)) 562 err = -EOVERFLOW; 563 if (err == -EEXIST) 564 err = -EADDRINUSE; 565 sock_put(sk); 566 goto err; 567 } 568 569 /* We need to ensure that the socket is hashed and visible. */ 570 smp_wmb(); 571 nlk_sk(sk)->bound = portid; 572 573 err: 574 release_sock(sk); 575 return err; 576 } 577 578 static void netlink_remove(struct sock *sk) 579 { 580 struct netlink_table *table; 581 582 table = &nl_table[sk->sk_protocol]; 583 if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, 584 netlink_rhashtable_params)) { 585 WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 586 __sock_put(sk); 587 } 588 589 netlink_table_grab(); 590 if (nlk_sk(sk)->subscriptions) { 591 __sk_del_bind_node(sk); 592 netlink_update_listeners(sk); 593 } 594 if (sk->sk_protocol == NETLINK_GENERIC) 595 atomic_inc(&genl_sk_destructing_cnt); 596 netlink_table_ungrab(); 597 } 598 599 static struct proto netlink_proto = { 600 .name = "NETLINK", 601 .owner = THIS_MODULE, 602 .obj_size = sizeof(struct netlink_sock), 603 }; 604 605 static int __netlink_create(struct net *net, struct socket *sock, 606 struct mutex *cb_mutex, int protocol, 607 int kern) 608 { 609 struct sock *sk; 610 struct netlink_sock *nlk; 611 612 sock->ops = &netlink_ops; 613 614 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); 615 if (!sk) 616 return -ENOMEM; 617 618 sock_init_data(sock, sk); 619 620 nlk = nlk_sk(sk); 621 if (cb_mutex) { 622 nlk->cb_mutex = cb_mutex; 623 } else { 624 nlk->cb_mutex = &nlk->cb_def_mutex; 625 mutex_init(nlk->cb_mutex); 626 lockdep_set_class_and_name(nlk->cb_mutex, 627 nlk_cb_mutex_keys + protocol, 628 nlk_cb_mutex_key_strings[protocol]); 629 } 630 init_waitqueue_head(&nlk->wait); 631 632 sk->sk_destruct = netlink_sock_destruct; 633 sk->sk_protocol = protocol; 634 return 0; 635 } 636 637 static int netlink_create(struct net *net, struct socket *sock, int protocol, 638 int kern) 639 { 640 struct module *module = NULL; 641 struct mutex *cb_mutex; 642 struct netlink_sock *nlk; 643 int (*bind)(struct net *net, int group); 644 void (*unbind)(struct net *net, int group); 645 int err = 0; 646 647 sock->state = SS_UNCONNECTED; 648 649 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 650 return -ESOCKTNOSUPPORT; 651 652 if (protocol < 0 || protocol >= MAX_LINKS) 653 return -EPROTONOSUPPORT; 654 655 netlink_lock_table(); 656 #ifdef CONFIG_MODULES 657 if (!nl_table[protocol].registered) { 658 netlink_unlock_table(); 659 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 660 netlink_lock_table(); 661 } 662 #endif 663 if (nl_table[protocol].registered && 664 try_module_get(nl_table[protocol].module)) 665 module = nl_table[protocol].module; 666 else 667 err = -EPROTONOSUPPORT; 668 cb_mutex = nl_table[protocol].cb_mutex; 669 bind = nl_table[protocol].bind; 670 unbind = nl_table[protocol].unbind; 671 netlink_unlock_table(); 672 673 if (err < 0) 674 goto out; 675 676 err = __netlink_create(net, sock, cb_mutex, protocol, kern); 677 if (err < 0) 678 goto out_module; 679 680 local_bh_disable(); 681 sock_prot_inuse_add(net, &netlink_proto, 1); 682 local_bh_enable(); 683 684 nlk = nlk_sk(sock->sk); 685 nlk->module = module; 686 nlk->netlink_bind = bind; 687 nlk->netlink_unbind = unbind; 688 out: 689 return err; 690 691 out_module: 692 module_put(module); 693 goto out; 694 } 695 696 static void deferred_put_nlk_sk(struct rcu_head *head) 697 { 698 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); 699 struct sock *sk = &nlk->sk; 700 701 if (!atomic_dec_and_test(&sk->sk_refcnt)) 702 return; 703 704 if (nlk->cb_running && nlk->cb.done) { 705 INIT_WORK(&nlk->work, netlink_sock_destruct_work); 706 schedule_work(&nlk->work); 707 return; 708 } 709 710 sk_free(sk); 711 } 712 713 static int netlink_release(struct socket *sock) 714 { 715 struct sock *sk = sock->sk; 716 struct netlink_sock *nlk; 717 718 if (!sk) 719 return 0; 720 721 netlink_remove(sk); 722 sock_orphan(sk); 723 nlk = nlk_sk(sk); 724 725 /* 726 * OK. Socket is unlinked, any packets that arrive now 727 * will be purged. 728 */ 729 730 /* must not acquire netlink_table_lock in any way again before unbind 731 * and notifying genetlink is done as otherwise it might deadlock 732 */ 733 if (nlk->netlink_unbind) { 734 int i; 735 736 for (i = 0; i < nlk->ngroups; i++) 737 if (test_bit(i, nlk->groups)) 738 nlk->netlink_unbind(sock_net(sk), i + 1); 739 } 740 if (sk->sk_protocol == NETLINK_GENERIC && 741 atomic_dec_return(&genl_sk_destructing_cnt) == 0) 742 wake_up(&genl_sk_destructing_waitq); 743 744 sock->sk = NULL; 745 wake_up_interruptible_all(&nlk->wait); 746 747 skb_queue_purge(&sk->sk_write_queue); 748 749 if (nlk->portid && nlk->bound) { 750 struct netlink_notify n = { 751 .net = sock_net(sk), 752 .protocol = sk->sk_protocol, 753 .portid = nlk->portid, 754 }; 755 blocking_notifier_call_chain(&netlink_chain, 756 NETLINK_URELEASE, &n); 757 } 758 759 module_put(nlk->module); 760 761 if (netlink_is_kernel(sk)) { 762 netlink_table_grab(); 763 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 764 if (--nl_table[sk->sk_protocol].registered == 0) { 765 struct listeners *old; 766 767 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 768 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 769 kfree_rcu(old, rcu); 770 nl_table[sk->sk_protocol].module = NULL; 771 nl_table[sk->sk_protocol].bind = NULL; 772 nl_table[sk->sk_protocol].unbind = NULL; 773 nl_table[sk->sk_protocol].flags = 0; 774 nl_table[sk->sk_protocol].registered = 0; 775 } 776 netlink_table_ungrab(); 777 } 778 779 kfree(nlk->groups); 780 nlk->groups = NULL; 781 782 local_bh_disable(); 783 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 784 local_bh_enable(); 785 call_rcu(&nlk->rcu, deferred_put_nlk_sk); 786 return 0; 787 } 788 789 static int netlink_autobind(struct socket *sock) 790 { 791 struct sock *sk = sock->sk; 792 struct net *net = sock_net(sk); 793 struct netlink_table *table = &nl_table[sk->sk_protocol]; 794 s32 portid = task_tgid_vnr(current); 795 int err; 796 s32 rover = -4096; 797 bool ok; 798 799 retry: 800 cond_resched(); 801 rcu_read_lock(); 802 ok = !__netlink_lookup(table, portid, net); 803 rcu_read_unlock(); 804 if (!ok) { 805 /* Bind collision, search negative portid values. */ 806 if (rover == -4096) 807 /* rover will be in range [S32_MIN, -4097] */ 808 rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN); 809 else if (rover >= -4096) 810 rover = -4097; 811 portid = rover--; 812 goto retry; 813 } 814 815 err = netlink_insert(sk, portid); 816 if (err == -EADDRINUSE) 817 goto retry; 818 819 /* If 2 threads race to autobind, that is fine. */ 820 if (err == -EBUSY) 821 err = 0; 822 823 return err; 824 } 825 826 /** 827 * __netlink_ns_capable - General netlink message capability test 828 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. 829 * @user_ns: The user namespace of the capability to use 830 * @cap: The capability to use 831 * 832 * Test to see if the opener of the socket we received the message 833 * from had when the netlink socket was created and the sender of the 834 * message has has the capability @cap in the user namespace @user_ns. 835 */ 836 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, 837 struct user_namespace *user_ns, int cap) 838 { 839 return ((nsp->flags & NETLINK_SKB_DST) || 840 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && 841 ns_capable(user_ns, cap); 842 } 843 EXPORT_SYMBOL(__netlink_ns_capable); 844 845 /** 846 * netlink_ns_capable - General netlink message capability test 847 * @skb: socket buffer holding a netlink command from userspace 848 * @user_ns: The user namespace of the capability to use 849 * @cap: The capability to use 850 * 851 * Test to see if the opener of the socket we received the message 852 * from had when the netlink socket was created and the sender of the 853 * message has has the capability @cap in the user namespace @user_ns. 854 */ 855 bool netlink_ns_capable(const struct sk_buff *skb, 856 struct user_namespace *user_ns, int cap) 857 { 858 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); 859 } 860 EXPORT_SYMBOL(netlink_ns_capable); 861 862 /** 863 * netlink_capable - Netlink global message capability test 864 * @skb: socket buffer holding a netlink command from userspace 865 * @cap: The capability to use 866 * 867 * Test to see if the opener of the socket we received the message 868 * from had when the netlink socket was created and the sender of the 869 * message has has the capability @cap in all user namespaces. 870 */ 871 bool netlink_capable(const struct sk_buff *skb, int cap) 872 { 873 return netlink_ns_capable(skb, &init_user_ns, cap); 874 } 875 EXPORT_SYMBOL(netlink_capable); 876 877 /** 878 * netlink_net_capable - Netlink network namespace message capability test 879 * @skb: socket buffer holding a netlink command from userspace 880 * @cap: The capability to use 881 * 882 * Test to see if the opener of the socket we received the message 883 * from had when the netlink socket was created and the sender of the 884 * message has has the capability @cap over the network namespace of 885 * the socket we received the message from. 886 */ 887 bool netlink_net_capable(const struct sk_buff *skb, int cap) 888 { 889 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); 890 } 891 EXPORT_SYMBOL(netlink_net_capable); 892 893 static inline int netlink_allowed(const struct socket *sock, unsigned int flag) 894 { 895 return (nl_table[sock->sk->sk_protocol].flags & flag) || 896 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 897 } 898 899 static void 900 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 901 { 902 struct netlink_sock *nlk = nlk_sk(sk); 903 904 if (nlk->subscriptions && !subscriptions) 905 __sk_del_bind_node(sk); 906 else if (!nlk->subscriptions && subscriptions) 907 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 908 nlk->subscriptions = subscriptions; 909 } 910 911 static int netlink_realloc_groups(struct sock *sk) 912 { 913 struct netlink_sock *nlk = nlk_sk(sk); 914 unsigned int groups; 915 unsigned long *new_groups; 916 int err = 0; 917 918 netlink_table_grab(); 919 920 groups = nl_table[sk->sk_protocol].groups; 921 if (!nl_table[sk->sk_protocol].registered) { 922 err = -ENOENT; 923 goto out_unlock; 924 } 925 926 if (nlk->ngroups >= groups) 927 goto out_unlock; 928 929 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 930 if (new_groups == NULL) { 931 err = -ENOMEM; 932 goto out_unlock; 933 } 934 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 935 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 936 937 nlk->groups = new_groups; 938 nlk->ngroups = groups; 939 out_unlock: 940 netlink_table_ungrab(); 941 return err; 942 } 943 944 static void netlink_undo_bind(int group, long unsigned int groups, 945 struct sock *sk) 946 { 947 struct netlink_sock *nlk = nlk_sk(sk); 948 int undo; 949 950 if (!nlk->netlink_unbind) 951 return; 952 953 for (undo = 0; undo < group; undo++) 954 if (test_bit(undo, &groups)) 955 nlk->netlink_unbind(sock_net(sk), undo + 1); 956 } 957 958 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 959 int addr_len) 960 { 961 struct sock *sk = sock->sk; 962 struct net *net = sock_net(sk); 963 struct netlink_sock *nlk = nlk_sk(sk); 964 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 965 int err; 966 long unsigned int groups = nladdr->nl_groups; 967 bool bound; 968 969 if (addr_len < sizeof(struct sockaddr_nl)) 970 return -EINVAL; 971 972 if (nladdr->nl_family != AF_NETLINK) 973 return -EINVAL; 974 975 /* Only superuser is allowed to listen multicasts */ 976 if (groups) { 977 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 978 return -EPERM; 979 err = netlink_realloc_groups(sk); 980 if (err) 981 return err; 982 } 983 984 bound = nlk->bound; 985 if (bound) { 986 /* Ensure nlk->portid is up-to-date. */ 987 smp_rmb(); 988 989 if (nladdr->nl_pid != nlk->portid) 990 return -EINVAL; 991 } 992 993 if (nlk->netlink_bind && groups) { 994 int group; 995 996 for (group = 0; group < nlk->ngroups; group++) { 997 if (!test_bit(group, &groups)) 998 continue; 999 err = nlk->netlink_bind(net, group + 1); 1000 if (!err) 1001 continue; 1002 netlink_undo_bind(group, groups, sk); 1003 return err; 1004 } 1005 } 1006 1007 /* No need for barriers here as we return to user-space without 1008 * using any of the bound attributes. 1009 */ 1010 if (!bound) { 1011 err = nladdr->nl_pid ? 1012 netlink_insert(sk, nladdr->nl_pid) : 1013 netlink_autobind(sock); 1014 if (err) { 1015 netlink_undo_bind(nlk->ngroups, groups, sk); 1016 return err; 1017 } 1018 } 1019 1020 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 1021 return 0; 1022 1023 netlink_table_grab(); 1024 netlink_update_subscriptions(sk, nlk->subscriptions + 1025 hweight32(groups) - 1026 hweight32(nlk->groups[0])); 1027 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; 1028 netlink_update_listeners(sk); 1029 netlink_table_ungrab(); 1030 1031 return 0; 1032 } 1033 1034 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 1035 int alen, int flags) 1036 { 1037 int err = 0; 1038 struct sock *sk = sock->sk; 1039 struct netlink_sock *nlk = nlk_sk(sk); 1040 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1041 1042 if (alen < sizeof(addr->sa_family)) 1043 return -EINVAL; 1044 1045 if (addr->sa_family == AF_UNSPEC) { 1046 sk->sk_state = NETLINK_UNCONNECTED; 1047 nlk->dst_portid = 0; 1048 nlk->dst_group = 0; 1049 return 0; 1050 } 1051 if (addr->sa_family != AF_NETLINK) 1052 return -EINVAL; 1053 1054 if ((nladdr->nl_groups || nladdr->nl_pid) && 1055 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1056 return -EPERM; 1057 1058 /* No need for barriers here as we return to user-space without 1059 * using any of the bound attributes. 1060 */ 1061 if (!nlk->bound) 1062 err = netlink_autobind(sock); 1063 1064 if (err == 0) { 1065 sk->sk_state = NETLINK_CONNECTED; 1066 nlk->dst_portid = nladdr->nl_pid; 1067 nlk->dst_group = ffs(nladdr->nl_groups); 1068 } 1069 1070 return err; 1071 } 1072 1073 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1074 int *addr_len, int peer) 1075 { 1076 struct sock *sk = sock->sk; 1077 struct netlink_sock *nlk = nlk_sk(sk); 1078 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 1079 1080 nladdr->nl_family = AF_NETLINK; 1081 nladdr->nl_pad = 0; 1082 *addr_len = sizeof(*nladdr); 1083 1084 if (peer) { 1085 nladdr->nl_pid = nlk->dst_portid; 1086 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 1087 } else { 1088 nladdr->nl_pid = nlk->portid; 1089 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1090 } 1091 return 0; 1092 } 1093 1094 static int netlink_ioctl(struct socket *sock, unsigned int cmd, 1095 unsigned long arg) 1096 { 1097 /* try to hand this ioctl down to the NIC drivers. 1098 */ 1099 return -ENOIOCTLCMD; 1100 } 1101 1102 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1103 { 1104 struct sock *sock; 1105 struct netlink_sock *nlk; 1106 1107 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 1108 if (!sock) 1109 return ERR_PTR(-ECONNREFUSED); 1110 1111 /* Don't bother queuing skb if kernel socket has no input function */ 1112 nlk = nlk_sk(sock); 1113 if (sock->sk_state == NETLINK_CONNECTED && 1114 nlk->dst_portid != nlk_sk(ssk)->portid) { 1115 sock_put(sock); 1116 return ERR_PTR(-ECONNREFUSED); 1117 } 1118 return sock; 1119 } 1120 1121 struct sock *netlink_getsockbyfilp(struct file *filp) 1122 { 1123 struct inode *inode = file_inode(filp); 1124 struct sock *sock; 1125 1126 if (!S_ISSOCK(inode->i_mode)) 1127 return ERR_PTR(-ENOTSOCK); 1128 1129 sock = SOCKET_I(inode)->sk; 1130 if (sock->sk_family != AF_NETLINK) 1131 return ERR_PTR(-EINVAL); 1132 1133 sock_hold(sock); 1134 return sock; 1135 } 1136 1137 static struct sk_buff *netlink_alloc_large_skb(unsigned int size, 1138 int broadcast) 1139 { 1140 struct sk_buff *skb; 1141 void *data; 1142 1143 if (size <= NLMSG_GOODSIZE || broadcast) 1144 return alloc_skb(size, GFP_KERNEL); 1145 1146 size = SKB_DATA_ALIGN(size) + 1147 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1148 1149 data = vmalloc(size); 1150 if (data == NULL) 1151 return NULL; 1152 1153 skb = __build_skb(data, size); 1154 if (skb == NULL) 1155 vfree(data); 1156 else 1157 skb->destructor = netlink_skb_destructor; 1158 1159 return skb; 1160 } 1161 1162 /* 1163 * Attach a skb to a netlink socket. 1164 * The caller must hold a reference to the destination socket. On error, the 1165 * reference is dropped. The skb is not send to the destination, just all 1166 * all error checks are performed and memory in the queue is reserved. 1167 * Return values: 1168 * < 0: error. skb freed, reference to sock dropped. 1169 * 0: continue 1170 * 1: repeat lookup - reference dropped while waiting for socket memory. 1171 */ 1172 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1173 long *timeo, struct sock *ssk) 1174 { 1175 struct netlink_sock *nlk; 1176 1177 nlk = nlk_sk(sk); 1178 1179 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1180 test_bit(NETLINK_S_CONGESTED, &nlk->state))) { 1181 DECLARE_WAITQUEUE(wait, current); 1182 if (!*timeo) { 1183 if (!ssk || netlink_is_kernel(ssk)) 1184 netlink_overrun(sk); 1185 sock_put(sk); 1186 kfree_skb(skb); 1187 return -EAGAIN; 1188 } 1189 1190 __set_current_state(TASK_INTERRUPTIBLE); 1191 add_wait_queue(&nlk->wait, &wait); 1192 1193 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1194 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1195 !sock_flag(sk, SOCK_DEAD)) 1196 *timeo = schedule_timeout(*timeo); 1197 1198 __set_current_state(TASK_RUNNING); 1199 remove_wait_queue(&nlk->wait, &wait); 1200 sock_put(sk); 1201 1202 if (signal_pending(current)) { 1203 kfree_skb(skb); 1204 return sock_intr_errno(*timeo); 1205 } 1206 return 1; 1207 } 1208 netlink_skb_set_owner_r(skb, sk); 1209 return 0; 1210 } 1211 1212 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1213 { 1214 int len = skb->len; 1215 1216 netlink_deliver_tap(skb); 1217 1218 skb_queue_tail(&sk->sk_receive_queue, skb); 1219 sk->sk_data_ready(sk); 1220 return len; 1221 } 1222 1223 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1224 { 1225 int len = __netlink_sendskb(sk, skb); 1226 1227 sock_put(sk); 1228 return len; 1229 } 1230 1231 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 1232 { 1233 kfree_skb(skb); 1234 sock_put(sk); 1235 } 1236 1237 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 1238 { 1239 int delta; 1240 1241 WARN_ON(skb->sk != NULL); 1242 delta = skb->end - skb->tail; 1243 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1244 return skb; 1245 1246 if (skb_shared(skb)) { 1247 struct sk_buff *nskb = skb_clone(skb, allocation); 1248 if (!nskb) 1249 return skb; 1250 consume_skb(skb); 1251 skb = nskb; 1252 } 1253 1254 pskb_expand_head(skb, 0, -delta, 1255 (allocation & ~__GFP_DIRECT_RECLAIM) | 1256 __GFP_NOWARN | __GFP_NORETRY); 1257 return skb; 1258 } 1259 1260 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1261 struct sock *ssk) 1262 { 1263 int ret; 1264 struct netlink_sock *nlk = nlk_sk(sk); 1265 1266 ret = -ECONNREFUSED; 1267 if (nlk->netlink_rcv != NULL) { 1268 ret = skb->len; 1269 netlink_skb_set_owner_r(skb, sk); 1270 NETLINK_CB(skb).sk = ssk; 1271 netlink_deliver_tap_kernel(sk, ssk, skb); 1272 nlk->netlink_rcv(skb); 1273 consume_skb(skb); 1274 } else { 1275 kfree_skb(skb); 1276 } 1277 sock_put(sk); 1278 return ret; 1279 } 1280 1281 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 1282 u32 portid, int nonblock) 1283 { 1284 struct sock *sk; 1285 int err; 1286 long timeo; 1287 1288 skb = netlink_trim(skb, gfp_any()); 1289 1290 timeo = sock_sndtimeo(ssk, nonblock); 1291 retry: 1292 sk = netlink_getsockbyportid(ssk, portid); 1293 if (IS_ERR(sk)) { 1294 kfree_skb(skb); 1295 return PTR_ERR(sk); 1296 } 1297 if (netlink_is_kernel(sk)) 1298 return netlink_unicast_kernel(sk, skb, ssk); 1299 1300 if (sk_filter(sk, skb)) { 1301 err = skb->len; 1302 kfree_skb(skb); 1303 sock_put(sk); 1304 return err; 1305 } 1306 1307 err = netlink_attachskb(sk, skb, &timeo, ssk); 1308 if (err == 1) 1309 goto retry; 1310 if (err) 1311 return err; 1312 1313 return netlink_sendskb(sk, skb); 1314 } 1315 EXPORT_SYMBOL(netlink_unicast); 1316 1317 int netlink_has_listeners(struct sock *sk, unsigned int group) 1318 { 1319 int res = 0; 1320 struct listeners *listeners; 1321 1322 BUG_ON(!netlink_is_kernel(sk)); 1323 1324 rcu_read_lock(); 1325 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 1326 1327 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 1328 res = test_bit(group - 1, listeners->masks); 1329 1330 rcu_read_unlock(); 1331 1332 return res; 1333 } 1334 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1335 1336 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1337 { 1338 struct netlink_sock *nlk = nlk_sk(sk); 1339 1340 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1341 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1342 netlink_skb_set_owner_r(skb, sk); 1343 __netlink_sendskb(sk, skb); 1344 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1345 } 1346 return -1; 1347 } 1348 1349 struct netlink_broadcast_data { 1350 struct sock *exclude_sk; 1351 struct net *net; 1352 u32 portid; 1353 u32 group; 1354 int failure; 1355 int delivery_failure; 1356 int congested; 1357 int delivered; 1358 gfp_t allocation; 1359 struct sk_buff *skb, *skb2; 1360 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); 1361 void *tx_data; 1362 }; 1363 1364 static void do_one_broadcast(struct sock *sk, 1365 struct netlink_broadcast_data *p) 1366 { 1367 struct netlink_sock *nlk = nlk_sk(sk); 1368 int val; 1369 1370 if (p->exclude_sk == sk) 1371 return; 1372 1373 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1374 !test_bit(p->group - 1, nlk->groups)) 1375 return; 1376 1377 if (!net_eq(sock_net(sk), p->net)) { 1378 if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) 1379 return; 1380 1381 if (!peernet_has_id(sock_net(sk), p->net)) 1382 return; 1383 1384 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, 1385 CAP_NET_BROADCAST)) 1386 return; 1387 } 1388 1389 if (p->failure) { 1390 netlink_overrun(sk); 1391 return; 1392 } 1393 1394 sock_hold(sk); 1395 if (p->skb2 == NULL) { 1396 if (skb_shared(p->skb)) { 1397 p->skb2 = skb_clone(p->skb, p->allocation); 1398 } else { 1399 p->skb2 = skb_get(p->skb); 1400 /* 1401 * skb ownership may have been set when 1402 * delivered to a previous socket. 1403 */ 1404 skb_orphan(p->skb2); 1405 } 1406 } 1407 if (p->skb2 == NULL) { 1408 netlink_overrun(sk); 1409 /* Clone failed. Notify ALL listeners. */ 1410 p->failure = 1; 1411 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1412 p->delivery_failure = 1; 1413 goto out; 1414 } 1415 if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1416 kfree_skb(p->skb2); 1417 p->skb2 = NULL; 1418 goto out; 1419 } 1420 if (sk_filter(sk, p->skb2)) { 1421 kfree_skb(p->skb2); 1422 p->skb2 = NULL; 1423 goto out; 1424 } 1425 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); 1426 NETLINK_CB(p->skb2).nsid_is_set = true; 1427 val = netlink_broadcast_deliver(sk, p->skb2); 1428 if (val < 0) { 1429 netlink_overrun(sk); 1430 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1431 p->delivery_failure = 1; 1432 } else { 1433 p->congested |= val; 1434 p->delivered = 1; 1435 p->skb2 = NULL; 1436 } 1437 out: 1438 sock_put(sk); 1439 } 1440 1441 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, 1442 u32 group, gfp_t allocation, 1443 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), 1444 void *filter_data) 1445 { 1446 struct net *net = sock_net(ssk); 1447 struct netlink_broadcast_data info; 1448 struct sock *sk; 1449 1450 skb = netlink_trim(skb, allocation); 1451 1452 info.exclude_sk = ssk; 1453 info.net = net; 1454 info.portid = portid; 1455 info.group = group; 1456 info.failure = 0; 1457 info.delivery_failure = 0; 1458 info.congested = 0; 1459 info.delivered = 0; 1460 info.allocation = allocation; 1461 info.skb = skb; 1462 info.skb2 = NULL; 1463 info.tx_filter = filter; 1464 info.tx_data = filter_data; 1465 1466 /* While we sleep in clone, do not allow to change socket list */ 1467 1468 netlink_lock_table(); 1469 1470 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1471 do_one_broadcast(sk, &info); 1472 1473 consume_skb(skb); 1474 1475 netlink_unlock_table(); 1476 1477 if (info.delivery_failure) { 1478 kfree_skb(info.skb2); 1479 return -ENOBUFS; 1480 } 1481 consume_skb(info.skb2); 1482 1483 if (info.delivered) { 1484 if (info.congested && gfpflags_allow_blocking(allocation)) 1485 yield(); 1486 return 0; 1487 } 1488 return -ESRCH; 1489 } 1490 EXPORT_SYMBOL(netlink_broadcast_filtered); 1491 1492 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1493 u32 group, gfp_t allocation) 1494 { 1495 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, 1496 NULL, NULL); 1497 } 1498 EXPORT_SYMBOL(netlink_broadcast); 1499 1500 struct netlink_set_err_data { 1501 struct sock *exclude_sk; 1502 u32 portid; 1503 u32 group; 1504 int code; 1505 }; 1506 1507 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1508 { 1509 struct netlink_sock *nlk = nlk_sk(sk); 1510 int ret = 0; 1511 1512 if (sk == p->exclude_sk) 1513 goto out; 1514 1515 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1516 goto out; 1517 1518 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1519 !test_bit(p->group - 1, nlk->groups)) 1520 goto out; 1521 1522 if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) { 1523 ret = 1; 1524 goto out; 1525 } 1526 1527 sk->sk_err = p->code; 1528 sk->sk_error_report(sk); 1529 out: 1530 return ret; 1531 } 1532 1533 /** 1534 * netlink_set_err - report error to broadcast listeners 1535 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1536 * @portid: the PORTID of a process that we want to skip (if any) 1537 * @group: the broadcast group that will notice the error 1538 * @code: error code, must be negative (as usual in kernelspace) 1539 * 1540 * This function returns the number of broadcast listeners that have set the 1541 * NETLINK_NO_ENOBUFS socket option. 1542 */ 1543 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1544 { 1545 struct netlink_set_err_data info; 1546 struct sock *sk; 1547 int ret = 0; 1548 1549 info.exclude_sk = ssk; 1550 info.portid = portid; 1551 info.group = group; 1552 /* sk->sk_err wants a positive error value */ 1553 info.code = -code; 1554 1555 read_lock(&nl_table_lock); 1556 1557 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1558 ret += do_one_set_err(sk, &info); 1559 1560 read_unlock(&nl_table_lock); 1561 return ret; 1562 } 1563 EXPORT_SYMBOL(netlink_set_err); 1564 1565 /* must be called with netlink table grabbed */ 1566 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1567 unsigned int group, 1568 int is_new) 1569 { 1570 int old, new = !!is_new, subscriptions; 1571 1572 old = test_bit(group - 1, nlk->groups); 1573 subscriptions = nlk->subscriptions - old + new; 1574 if (new) 1575 __set_bit(group - 1, nlk->groups); 1576 else 1577 __clear_bit(group - 1, nlk->groups); 1578 netlink_update_subscriptions(&nlk->sk, subscriptions); 1579 netlink_update_listeners(&nlk->sk); 1580 } 1581 1582 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1583 char __user *optval, unsigned int optlen) 1584 { 1585 struct sock *sk = sock->sk; 1586 struct netlink_sock *nlk = nlk_sk(sk); 1587 unsigned int val = 0; 1588 int err; 1589 1590 if (level != SOL_NETLINK) 1591 return -ENOPROTOOPT; 1592 1593 if (optlen >= sizeof(int) && 1594 get_user(val, (unsigned int __user *)optval)) 1595 return -EFAULT; 1596 1597 switch (optname) { 1598 case NETLINK_PKTINFO: 1599 if (val) 1600 nlk->flags |= NETLINK_F_RECV_PKTINFO; 1601 else 1602 nlk->flags &= ~NETLINK_F_RECV_PKTINFO; 1603 err = 0; 1604 break; 1605 case NETLINK_ADD_MEMBERSHIP: 1606 case NETLINK_DROP_MEMBERSHIP: { 1607 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1608 return -EPERM; 1609 err = netlink_realloc_groups(sk); 1610 if (err) 1611 return err; 1612 if (!val || val - 1 >= nlk->ngroups) 1613 return -EINVAL; 1614 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { 1615 err = nlk->netlink_bind(sock_net(sk), val); 1616 if (err) 1617 return err; 1618 } 1619 netlink_table_grab(); 1620 netlink_update_socket_mc(nlk, val, 1621 optname == NETLINK_ADD_MEMBERSHIP); 1622 netlink_table_ungrab(); 1623 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) 1624 nlk->netlink_unbind(sock_net(sk), val); 1625 1626 err = 0; 1627 break; 1628 } 1629 case NETLINK_BROADCAST_ERROR: 1630 if (val) 1631 nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR; 1632 else 1633 nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR; 1634 err = 0; 1635 break; 1636 case NETLINK_NO_ENOBUFS: 1637 if (val) { 1638 nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS; 1639 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 1640 wake_up_interruptible(&nlk->wait); 1641 } else { 1642 nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS; 1643 } 1644 err = 0; 1645 break; 1646 case NETLINK_LISTEN_ALL_NSID: 1647 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1648 return -EPERM; 1649 1650 if (val) 1651 nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; 1652 else 1653 nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; 1654 err = 0; 1655 break; 1656 case NETLINK_CAP_ACK: 1657 if (val) 1658 nlk->flags |= NETLINK_F_CAP_ACK; 1659 else 1660 nlk->flags &= ~NETLINK_F_CAP_ACK; 1661 err = 0; 1662 break; 1663 default: 1664 err = -ENOPROTOOPT; 1665 } 1666 return err; 1667 } 1668 1669 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1670 char __user *optval, int __user *optlen) 1671 { 1672 struct sock *sk = sock->sk; 1673 struct netlink_sock *nlk = nlk_sk(sk); 1674 int len, val, err; 1675 1676 if (level != SOL_NETLINK) 1677 return -ENOPROTOOPT; 1678 1679 if (get_user(len, optlen)) 1680 return -EFAULT; 1681 if (len < 0) 1682 return -EINVAL; 1683 1684 switch (optname) { 1685 case NETLINK_PKTINFO: 1686 if (len < sizeof(int)) 1687 return -EINVAL; 1688 len = sizeof(int); 1689 val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0; 1690 if (put_user(len, optlen) || 1691 put_user(val, optval)) 1692 return -EFAULT; 1693 err = 0; 1694 break; 1695 case NETLINK_BROADCAST_ERROR: 1696 if (len < sizeof(int)) 1697 return -EINVAL; 1698 len = sizeof(int); 1699 val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0; 1700 if (put_user(len, optlen) || 1701 put_user(val, optval)) 1702 return -EFAULT; 1703 err = 0; 1704 break; 1705 case NETLINK_NO_ENOBUFS: 1706 if (len < sizeof(int)) 1707 return -EINVAL; 1708 len = sizeof(int); 1709 val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0; 1710 if (put_user(len, optlen) || 1711 put_user(val, optval)) 1712 return -EFAULT; 1713 err = 0; 1714 break; 1715 case NETLINK_LIST_MEMBERSHIPS: { 1716 int pos, idx, shift; 1717 1718 err = 0; 1719 netlink_lock_table(); 1720 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 1721 if (len - pos < sizeof(u32)) 1722 break; 1723 1724 idx = pos / sizeof(unsigned long); 1725 shift = (pos % sizeof(unsigned long)) * 8; 1726 if (put_user((u32)(nlk->groups[idx] >> shift), 1727 (u32 __user *)(optval + pos))) { 1728 err = -EFAULT; 1729 break; 1730 } 1731 } 1732 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) 1733 err = -EFAULT; 1734 netlink_unlock_table(); 1735 break; 1736 } 1737 case NETLINK_CAP_ACK: 1738 if (len < sizeof(int)) 1739 return -EINVAL; 1740 len = sizeof(int); 1741 val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0; 1742 if (put_user(len, optlen) || 1743 put_user(val, optval)) 1744 return -EFAULT; 1745 err = 0; 1746 break; 1747 default: 1748 err = -ENOPROTOOPT; 1749 } 1750 return err; 1751 } 1752 1753 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1754 { 1755 struct nl_pktinfo info; 1756 1757 info.group = NETLINK_CB(skb).dst_group; 1758 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1759 } 1760 1761 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, 1762 struct sk_buff *skb) 1763 { 1764 if (!NETLINK_CB(skb).nsid_is_set) 1765 return; 1766 1767 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), 1768 &NETLINK_CB(skb).nsid); 1769 } 1770 1771 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1772 { 1773 struct sock *sk = sock->sk; 1774 struct netlink_sock *nlk = nlk_sk(sk); 1775 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1776 u32 dst_portid; 1777 u32 dst_group; 1778 struct sk_buff *skb; 1779 int err; 1780 struct scm_cookie scm; 1781 u32 netlink_skb_flags = 0; 1782 1783 if (msg->msg_flags&MSG_OOB) 1784 return -EOPNOTSUPP; 1785 1786 err = scm_send(sock, msg, &scm, true); 1787 if (err < 0) 1788 return err; 1789 1790 if (msg->msg_namelen) { 1791 err = -EINVAL; 1792 if (addr->nl_family != AF_NETLINK) 1793 goto out; 1794 dst_portid = addr->nl_pid; 1795 dst_group = ffs(addr->nl_groups); 1796 err = -EPERM; 1797 if ((dst_group || dst_portid) && 1798 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1799 goto out; 1800 netlink_skb_flags |= NETLINK_SKB_DST; 1801 } else { 1802 dst_portid = nlk->dst_portid; 1803 dst_group = nlk->dst_group; 1804 } 1805 1806 if (!nlk->bound) { 1807 err = netlink_autobind(sock); 1808 if (err) 1809 goto out; 1810 } else { 1811 /* Ensure nlk is hashed and visible. */ 1812 smp_rmb(); 1813 } 1814 1815 err = -EMSGSIZE; 1816 if (len > sk->sk_sndbuf - 32) 1817 goto out; 1818 err = -ENOBUFS; 1819 skb = netlink_alloc_large_skb(len, dst_group); 1820 if (skb == NULL) 1821 goto out; 1822 1823 NETLINK_CB(skb).portid = nlk->portid; 1824 NETLINK_CB(skb).dst_group = dst_group; 1825 NETLINK_CB(skb).creds = scm.creds; 1826 NETLINK_CB(skb).flags = netlink_skb_flags; 1827 1828 err = -EFAULT; 1829 if (memcpy_from_msg(skb_put(skb, len), msg, len)) { 1830 kfree_skb(skb); 1831 goto out; 1832 } 1833 1834 err = security_netlink_send(sk, skb); 1835 if (err) { 1836 kfree_skb(skb); 1837 goto out; 1838 } 1839 1840 if (dst_group) { 1841 atomic_inc(&skb->users); 1842 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 1843 } 1844 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); 1845 1846 out: 1847 scm_destroy(&scm); 1848 return err; 1849 } 1850 1851 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1852 int flags) 1853 { 1854 struct scm_cookie scm; 1855 struct sock *sk = sock->sk; 1856 struct netlink_sock *nlk = nlk_sk(sk); 1857 int noblock = flags&MSG_DONTWAIT; 1858 size_t copied; 1859 struct sk_buff *skb, *data_skb; 1860 int err, ret; 1861 1862 if (flags&MSG_OOB) 1863 return -EOPNOTSUPP; 1864 1865 copied = 0; 1866 1867 skb = skb_recv_datagram(sk, flags, noblock, &err); 1868 if (skb == NULL) 1869 goto out; 1870 1871 data_skb = skb; 1872 1873 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1874 if (unlikely(skb_shinfo(skb)->frag_list)) { 1875 /* 1876 * If this skb has a frag_list, then here that means that we 1877 * will have to use the frag_list skb's data for compat tasks 1878 * and the regular skb's data for normal (non-compat) tasks. 1879 * 1880 * If we need to send the compat skb, assign it to the 1881 * 'data_skb' variable so that it will be used below for data 1882 * copying. We keep 'skb' for everything else, including 1883 * freeing both later. 1884 */ 1885 if (flags & MSG_CMSG_COMPAT) 1886 data_skb = skb_shinfo(skb)->frag_list; 1887 } 1888 #endif 1889 1890 /* Record the max length of recvmsg() calls for future allocations */ 1891 nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); 1892 nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, 1893 SKB_WITH_OVERHEAD(32768)); 1894 1895 copied = data_skb->len; 1896 if (len < copied) { 1897 msg->msg_flags |= MSG_TRUNC; 1898 copied = len; 1899 } 1900 1901 skb_reset_transport_header(data_skb); 1902 err = skb_copy_datagram_msg(data_skb, 0, msg, copied); 1903 1904 if (msg->msg_name) { 1905 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1906 addr->nl_family = AF_NETLINK; 1907 addr->nl_pad = 0; 1908 addr->nl_pid = NETLINK_CB(skb).portid; 1909 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1910 msg->msg_namelen = sizeof(*addr); 1911 } 1912 1913 if (nlk->flags & NETLINK_F_RECV_PKTINFO) 1914 netlink_cmsg_recv_pktinfo(msg, skb); 1915 if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) 1916 netlink_cmsg_listen_all_nsid(sk, msg, skb); 1917 1918 memset(&scm, 0, sizeof(scm)); 1919 scm.creds = *NETLINK_CREDS(skb); 1920 if (flags & MSG_TRUNC) 1921 copied = data_skb->len; 1922 1923 skb_free_datagram(sk, skb); 1924 1925 if (nlk->cb_running && 1926 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 1927 ret = netlink_dump(sk); 1928 if (ret) { 1929 sk->sk_err = -ret; 1930 sk->sk_error_report(sk); 1931 } 1932 } 1933 1934 scm_recv(sock, msg, &scm, flags); 1935 out: 1936 netlink_rcv_wake(sk); 1937 return err ? : copied; 1938 } 1939 1940 static void netlink_data_ready(struct sock *sk) 1941 { 1942 BUG(); 1943 } 1944 1945 /* 1946 * We export these functions to other modules. They provide a 1947 * complete set of kernel non-blocking support for message 1948 * queueing. 1949 */ 1950 1951 struct sock * 1952 __netlink_kernel_create(struct net *net, int unit, struct module *module, 1953 struct netlink_kernel_cfg *cfg) 1954 { 1955 struct socket *sock; 1956 struct sock *sk; 1957 struct netlink_sock *nlk; 1958 struct listeners *listeners = NULL; 1959 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; 1960 unsigned int groups; 1961 1962 BUG_ON(!nl_table); 1963 1964 if (unit < 0 || unit >= MAX_LINKS) 1965 return NULL; 1966 1967 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1968 return NULL; 1969 1970 if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) 1971 goto out_sock_release_nosk; 1972 1973 sk = sock->sk; 1974 1975 if (!cfg || cfg->groups < 32) 1976 groups = 32; 1977 else 1978 groups = cfg->groups; 1979 1980 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 1981 if (!listeners) 1982 goto out_sock_release; 1983 1984 sk->sk_data_ready = netlink_data_ready; 1985 if (cfg && cfg->input) 1986 nlk_sk(sk)->netlink_rcv = cfg->input; 1987 1988 if (netlink_insert(sk, 0)) 1989 goto out_sock_release; 1990 1991 nlk = nlk_sk(sk); 1992 nlk->flags |= NETLINK_F_KERNEL_SOCKET; 1993 1994 netlink_table_grab(); 1995 if (!nl_table[unit].registered) { 1996 nl_table[unit].groups = groups; 1997 rcu_assign_pointer(nl_table[unit].listeners, listeners); 1998 nl_table[unit].cb_mutex = cb_mutex; 1999 nl_table[unit].module = module; 2000 if (cfg) { 2001 nl_table[unit].bind = cfg->bind; 2002 nl_table[unit].unbind = cfg->unbind; 2003 nl_table[unit].flags = cfg->flags; 2004 if (cfg->compare) 2005 nl_table[unit].compare = cfg->compare; 2006 } 2007 nl_table[unit].registered = 1; 2008 } else { 2009 kfree(listeners); 2010 nl_table[unit].registered++; 2011 } 2012 netlink_table_ungrab(); 2013 return sk; 2014 2015 out_sock_release: 2016 kfree(listeners); 2017 netlink_kernel_release(sk); 2018 return NULL; 2019 2020 out_sock_release_nosk: 2021 sock_release(sock); 2022 return NULL; 2023 } 2024 EXPORT_SYMBOL(__netlink_kernel_create); 2025 2026 void 2027 netlink_kernel_release(struct sock *sk) 2028 { 2029 if (sk == NULL || sk->sk_socket == NULL) 2030 return; 2031 2032 sock_release(sk->sk_socket); 2033 } 2034 EXPORT_SYMBOL(netlink_kernel_release); 2035 2036 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 2037 { 2038 struct listeners *new, *old; 2039 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 2040 2041 if (groups < 32) 2042 groups = 32; 2043 2044 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 2045 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 2046 if (!new) 2047 return -ENOMEM; 2048 old = nl_deref_protected(tbl->listeners); 2049 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 2050 rcu_assign_pointer(tbl->listeners, new); 2051 2052 kfree_rcu(old, rcu); 2053 } 2054 tbl->groups = groups; 2055 2056 return 0; 2057 } 2058 2059 /** 2060 * netlink_change_ngroups - change number of multicast groups 2061 * 2062 * This changes the number of multicast groups that are available 2063 * on a certain netlink family. Note that it is not possible to 2064 * change the number of groups to below 32. Also note that it does 2065 * not implicitly call netlink_clear_multicast_users() when the 2066 * number of groups is reduced. 2067 * 2068 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 2069 * @groups: The new number of groups. 2070 */ 2071 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 2072 { 2073 int err; 2074 2075 netlink_table_grab(); 2076 err = __netlink_change_ngroups(sk, groups); 2077 netlink_table_ungrab(); 2078 2079 return err; 2080 } 2081 2082 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2083 { 2084 struct sock *sk; 2085 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 2086 2087 sk_for_each_bound(sk, &tbl->mc_list) 2088 netlink_update_socket_mc(nlk_sk(sk), group, 0); 2089 } 2090 2091 struct nlmsghdr * 2092 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2093 { 2094 struct nlmsghdr *nlh; 2095 int size = nlmsg_msg_size(len); 2096 2097 nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size)); 2098 nlh->nlmsg_type = type; 2099 nlh->nlmsg_len = size; 2100 nlh->nlmsg_flags = flags; 2101 nlh->nlmsg_pid = portid; 2102 nlh->nlmsg_seq = seq; 2103 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2104 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2105 return nlh; 2106 } 2107 EXPORT_SYMBOL(__nlmsg_put); 2108 2109 /* 2110 * It looks a bit ugly. 2111 * It would be better to create kernel thread. 2112 */ 2113 2114 static int netlink_dump(struct sock *sk) 2115 { 2116 struct netlink_sock *nlk = nlk_sk(sk); 2117 struct netlink_callback *cb; 2118 struct sk_buff *skb = NULL; 2119 struct nlmsghdr *nlh; 2120 struct module *module; 2121 int len, err = -ENOBUFS; 2122 int alloc_min_size; 2123 int alloc_size; 2124 2125 mutex_lock(nlk->cb_mutex); 2126 if (!nlk->cb_running) { 2127 err = -EINVAL; 2128 goto errout_skb; 2129 } 2130 2131 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2132 goto errout_skb; 2133 2134 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2135 * required, but it makes sense to _attempt_ a 16K bytes allocation 2136 * to reduce number of system calls on dump operations, if user 2137 * ever provided a big enough buffer. 2138 */ 2139 cb = &nlk->cb; 2140 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2141 2142 if (alloc_min_size < nlk->max_recvmsg_len) { 2143 alloc_size = nlk->max_recvmsg_len; 2144 skb = alloc_skb(alloc_size, 2145 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | 2146 __GFP_NOWARN | __GFP_NORETRY); 2147 } 2148 if (!skb) { 2149 alloc_size = alloc_min_size; 2150 skb = alloc_skb(alloc_size, GFP_KERNEL); 2151 } 2152 if (!skb) 2153 goto errout_skb; 2154 2155 /* Trim skb to allocated size. User is expected to provide buffer as 2156 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at 2157 * netlink_recvmsg())). dump will pack as many smaller messages as 2158 * could fit within the allocated skb. skb is typically allocated 2159 * with larger space than required (could be as much as near 2x the 2160 * requested size with align to next power of 2 approach). Allowing 2161 * dump to use the excess space makes it difficult for a user to have a 2162 * reasonable static buffer based on the expected largest dump of a 2163 * single netdev. The outcome is MSG_TRUNC error. 2164 */ 2165 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2166 netlink_skb_set_owner_r(skb, sk); 2167 2168 len = cb->dump(skb, cb); 2169 2170 if (len > 0) { 2171 mutex_unlock(nlk->cb_mutex); 2172 2173 if (sk_filter(sk, skb)) 2174 kfree_skb(skb); 2175 else 2176 __netlink_sendskb(sk, skb); 2177 return 0; 2178 } 2179 2180 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 2181 if (!nlh) 2182 goto errout_skb; 2183 2184 nl_dump_check_consistent(cb, nlh); 2185 2186 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 2187 2188 if (sk_filter(sk, skb)) 2189 kfree_skb(skb); 2190 else 2191 __netlink_sendskb(sk, skb); 2192 2193 if (cb->done) 2194 cb->done(cb); 2195 2196 nlk->cb_running = false; 2197 module = cb->module; 2198 skb = cb->skb; 2199 mutex_unlock(nlk->cb_mutex); 2200 module_put(module); 2201 consume_skb(skb); 2202 return 0; 2203 2204 errout_skb: 2205 mutex_unlock(nlk->cb_mutex); 2206 kfree_skb(skb); 2207 return err; 2208 } 2209 2210 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 2211 const struct nlmsghdr *nlh, 2212 struct netlink_dump_control *control) 2213 { 2214 struct netlink_callback *cb; 2215 struct sock *sk; 2216 struct netlink_sock *nlk; 2217 int ret; 2218 2219 atomic_inc(&skb->users); 2220 2221 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2222 if (sk == NULL) { 2223 ret = -ECONNREFUSED; 2224 goto error_free; 2225 } 2226 2227 nlk = nlk_sk(sk); 2228 mutex_lock(nlk->cb_mutex); 2229 /* A dump is in progress... */ 2230 if (nlk->cb_running) { 2231 ret = -EBUSY; 2232 goto error_unlock; 2233 } 2234 /* add reference of module which cb->dump belongs to */ 2235 if (!try_module_get(control->module)) { 2236 ret = -EPROTONOSUPPORT; 2237 goto error_unlock; 2238 } 2239 2240 cb = &nlk->cb; 2241 memset(cb, 0, sizeof(*cb)); 2242 cb->start = control->start; 2243 cb->dump = control->dump; 2244 cb->done = control->done; 2245 cb->nlh = nlh; 2246 cb->data = control->data; 2247 cb->module = control->module; 2248 cb->min_dump_alloc = control->min_dump_alloc; 2249 cb->skb = skb; 2250 2251 nlk->cb_running = true; 2252 2253 mutex_unlock(nlk->cb_mutex); 2254 2255 if (cb->start) 2256 cb->start(cb); 2257 2258 ret = netlink_dump(sk); 2259 sock_put(sk); 2260 2261 if (ret) 2262 return ret; 2263 2264 /* We successfully started a dump, by returning -EINTR we 2265 * signal not to send ACK even if it was requested. 2266 */ 2267 return -EINTR; 2268 2269 error_unlock: 2270 sock_put(sk); 2271 mutex_unlock(nlk->cb_mutex); 2272 error_free: 2273 kfree_skb(skb); 2274 return ret; 2275 } 2276 EXPORT_SYMBOL(__netlink_dump_start); 2277 2278 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 2279 { 2280 struct sk_buff *skb; 2281 struct nlmsghdr *rep; 2282 struct nlmsgerr *errmsg; 2283 size_t payload = sizeof(*errmsg); 2284 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); 2285 2286 /* Error messages get the original request appened, unless the user 2287 * requests to cap the error message. 2288 */ 2289 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) 2290 payload += nlmsg_len(nlh); 2291 2292 skb = nlmsg_new(payload, GFP_KERNEL); 2293 if (!skb) { 2294 struct sock *sk; 2295 2296 sk = netlink_lookup(sock_net(in_skb->sk), 2297 in_skb->sk->sk_protocol, 2298 NETLINK_CB(in_skb).portid); 2299 if (sk) { 2300 sk->sk_err = ENOBUFS; 2301 sk->sk_error_report(sk); 2302 sock_put(sk); 2303 } 2304 return; 2305 } 2306 2307 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2308 NLMSG_ERROR, payload, 0); 2309 errmsg = nlmsg_data(rep); 2310 errmsg->error = err; 2311 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); 2312 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); 2313 } 2314 EXPORT_SYMBOL(netlink_ack); 2315 2316 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 2317 struct nlmsghdr *)) 2318 { 2319 struct nlmsghdr *nlh; 2320 int err; 2321 2322 while (skb->len >= nlmsg_total_size(0)) { 2323 int msglen; 2324 2325 nlh = nlmsg_hdr(skb); 2326 err = 0; 2327 2328 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 2329 return 0; 2330 2331 /* Only requests are handled by the kernel */ 2332 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 2333 goto ack; 2334 2335 /* Skip control messages */ 2336 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 2337 goto ack; 2338 2339 err = cb(skb, nlh); 2340 if (err == -EINTR) 2341 goto skip; 2342 2343 ack: 2344 if (nlh->nlmsg_flags & NLM_F_ACK || err) 2345 netlink_ack(skb, nlh, err); 2346 2347 skip: 2348 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 2349 if (msglen > skb->len) 2350 msglen = skb->len; 2351 skb_pull(skb, msglen); 2352 } 2353 2354 return 0; 2355 } 2356 EXPORT_SYMBOL(netlink_rcv_skb); 2357 2358 /** 2359 * nlmsg_notify - send a notification netlink message 2360 * @sk: netlink socket to use 2361 * @skb: notification message 2362 * @portid: destination netlink portid for reports or 0 2363 * @group: destination multicast group or 0 2364 * @report: 1 to report back, 0 to disable 2365 * @flags: allocation flags 2366 */ 2367 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 2368 unsigned int group, int report, gfp_t flags) 2369 { 2370 int err = 0; 2371 2372 if (group) { 2373 int exclude_portid = 0; 2374 2375 if (report) { 2376 atomic_inc(&skb->users); 2377 exclude_portid = portid; 2378 } 2379 2380 /* errors reported via destination sk->sk_err, but propagate 2381 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 2382 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 2383 } 2384 2385 if (report) { 2386 int err2; 2387 2388 err2 = nlmsg_unicast(sk, skb, portid); 2389 if (!err || err == -ESRCH) 2390 err = err2; 2391 } 2392 2393 return err; 2394 } 2395 EXPORT_SYMBOL(nlmsg_notify); 2396 2397 #ifdef CONFIG_PROC_FS 2398 struct nl_seq_iter { 2399 struct seq_net_private p; 2400 struct rhashtable_iter hti; 2401 int link; 2402 }; 2403 2404 static int netlink_walk_start(struct nl_seq_iter *iter) 2405 { 2406 int err; 2407 2408 err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti, 2409 GFP_KERNEL); 2410 if (err) { 2411 iter->link = MAX_LINKS; 2412 return err; 2413 } 2414 2415 err = rhashtable_walk_start(&iter->hti); 2416 return err == -EAGAIN ? 0 : err; 2417 } 2418 2419 static void netlink_walk_stop(struct nl_seq_iter *iter) 2420 { 2421 rhashtable_walk_stop(&iter->hti); 2422 rhashtable_walk_exit(&iter->hti); 2423 } 2424 2425 static void *__netlink_seq_next(struct seq_file *seq) 2426 { 2427 struct nl_seq_iter *iter = seq->private; 2428 struct netlink_sock *nlk; 2429 2430 do { 2431 for (;;) { 2432 int err; 2433 2434 nlk = rhashtable_walk_next(&iter->hti); 2435 2436 if (IS_ERR(nlk)) { 2437 if (PTR_ERR(nlk) == -EAGAIN) 2438 continue; 2439 2440 return nlk; 2441 } 2442 2443 if (nlk) 2444 break; 2445 2446 netlink_walk_stop(iter); 2447 if (++iter->link >= MAX_LINKS) 2448 return NULL; 2449 2450 err = netlink_walk_start(iter); 2451 if (err) 2452 return ERR_PTR(err); 2453 } 2454 } while (sock_net(&nlk->sk) != seq_file_net(seq)); 2455 2456 return nlk; 2457 } 2458 2459 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) 2460 { 2461 struct nl_seq_iter *iter = seq->private; 2462 void *obj = SEQ_START_TOKEN; 2463 loff_t pos; 2464 int err; 2465 2466 iter->link = 0; 2467 2468 err = netlink_walk_start(iter); 2469 if (err) 2470 return ERR_PTR(err); 2471 2472 for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) 2473 obj = __netlink_seq_next(seq); 2474 2475 return obj; 2476 } 2477 2478 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2479 { 2480 ++*pos; 2481 return __netlink_seq_next(seq); 2482 } 2483 2484 static void netlink_seq_stop(struct seq_file *seq, void *v) 2485 { 2486 struct nl_seq_iter *iter = seq->private; 2487 2488 if (iter->link >= MAX_LINKS) 2489 return; 2490 2491 netlink_walk_stop(iter); 2492 } 2493 2494 2495 static int netlink_seq_show(struct seq_file *seq, void *v) 2496 { 2497 if (v == SEQ_START_TOKEN) { 2498 seq_puts(seq, 2499 "sk Eth Pid Groups " 2500 "Rmem Wmem Dump Locks Drops Inode\n"); 2501 } else { 2502 struct sock *s = v; 2503 struct netlink_sock *nlk = nlk_sk(s); 2504 2505 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n", 2506 s, 2507 s->sk_protocol, 2508 nlk->portid, 2509 nlk->groups ? (u32)nlk->groups[0] : 0, 2510 sk_rmem_alloc_get(s), 2511 sk_wmem_alloc_get(s), 2512 nlk->cb_running, 2513 atomic_read(&s->sk_refcnt), 2514 atomic_read(&s->sk_drops), 2515 sock_i_ino(s) 2516 ); 2517 2518 } 2519 return 0; 2520 } 2521 2522 static const struct seq_operations netlink_seq_ops = { 2523 .start = netlink_seq_start, 2524 .next = netlink_seq_next, 2525 .stop = netlink_seq_stop, 2526 .show = netlink_seq_show, 2527 }; 2528 2529 2530 static int netlink_seq_open(struct inode *inode, struct file *file) 2531 { 2532 return seq_open_net(inode, file, &netlink_seq_ops, 2533 sizeof(struct nl_seq_iter)); 2534 } 2535 2536 static const struct file_operations netlink_seq_fops = { 2537 .owner = THIS_MODULE, 2538 .open = netlink_seq_open, 2539 .read = seq_read, 2540 .llseek = seq_lseek, 2541 .release = seq_release_net, 2542 }; 2543 2544 #endif 2545 2546 int netlink_register_notifier(struct notifier_block *nb) 2547 { 2548 return blocking_notifier_chain_register(&netlink_chain, nb); 2549 } 2550 EXPORT_SYMBOL(netlink_register_notifier); 2551 2552 int netlink_unregister_notifier(struct notifier_block *nb) 2553 { 2554 return blocking_notifier_chain_unregister(&netlink_chain, nb); 2555 } 2556 EXPORT_SYMBOL(netlink_unregister_notifier); 2557 2558 static const struct proto_ops netlink_ops = { 2559 .family = PF_NETLINK, 2560 .owner = THIS_MODULE, 2561 .release = netlink_release, 2562 .bind = netlink_bind, 2563 .connect = netlink_connect, 2564 .socketpair = sock_no_socketpair, 2565 .accept = sock_no_accept, 2566 .getname = netlink_getname, 2567 .poll = datagram_poll, 2568 .ioctl = netlink_ioctl, 2569 .listen = sock_no_listen, 2570 .shutdown = sock_no_shutdown, 2571 .setsockopt = netlink_setsockopt, 2572 .getsockopt = netlink_getsockopt, 2573 .sendmsg = netlink_sendmsg, 2574 .recvmsg = netlink_recvmsg, 2575 .mmap = sock_no_mmap, 2576 .sendpage = sock_no_sendpage, 2577 }; 2578 2579 static const struct net_proto_family netlink_family_ops = { 2580 .family = PF_NETLINK, 2581 .create = netlink_create, 2582 .owner = THIS_MODULE, /* for consistency 8) */ 2583 }; 2584 2585 static int __net_init netlink_net_init(struct net *net) 2586 { 2587 #ifdef CONFIG_PROC_FS 2588 if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops)) 2589 return -ENOMEM; 2590 #endif 2591 return 0; 2592 } 2593 2594 static void __net_exit netlink_net_exit(struct net *net) 2595 { 2596 #ifdef CONFIG_PROC_FS 2597 remove_proc_entry("netlink", net->proc_net); 2598 #endif 2599 } 2600 2601 static void __init netlink_add_usersock_entry(void) 2602 { 2603 struct listeners *listeners; 2604 int groups = 32; 2605 2606 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2607 if (!listeners) 2608 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2609 2610 netlink_table_grab(); 2611 2612 nl_table[NETLINK_USERSOCK].groups = groups; 2613 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2614 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2615 nl_table[NETLINK_USERSOCK].registered = 1; 2616 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2617 2618 netlink_table_ungrab(); 2619 } 2620 2621 static struct pernet_operations __net_initdata netlink_net_ops = { 2622 .init = netlink_net_init, 2623 .exit = netlink_net_exit, 2624 }; 2625 2626 static inline u32 netlink_hash(const void *data, u32 len, u32 seed) 2627 { 2628 const struct netlink_sock *nlk = data; 2629 struct netlink_compare_arg arg; 2630 2631 netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); 2632 return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); 2633 } 2634 2635 static const struct rhashtable_params netlink_rhashtable_params = { 2636 .head_offset = offsetof(struct netlink_sock, node), 2637 .key_len = netlink_compare_arg_len, 2638 .obj_hashfn = netlink_hash, 2639 .obj_cmpfn = netlink_compare, 2640 .automatic_shrinking = true, 2641 }; 2642 2643 static int __init netlink_proto_init(void) 2644 { 2645 int i; 2646 int err = proto_register(&netlink_proto, 0); 2647 2648 if (err != 0) 2649 goto out; 2650 2651 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2652 2653 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2654 if (!nl_table) 2655 goto panic; 2656 2657 for (i = 0; i < MAX_LINKS; i++) { 2658 if (rhashtable_init(&nl_table[i].hash, 2659 &netlink_rhashtable_params) < 0) { 2660 while (--i > 0) 2661 rhashtable_destroy(&nl_table[i].hash); 2662 kfree(nl_table); 2663 goto panic; 2664 } 2665 } 2666 2667 INIT_LIST_HEAD(&netlink_tap_all); 2668 2669 netlink_add_usersock_entry(); 2670 2671 sock_register(&netlink_family_ops); 2672 register_pernet_subsys(&netlink_net_ops); 2673 /* The netlink device handler may be needed early. */ 2674 rtnetlink_init(); 2675 out: 2676 return err; 2677 panic: 2678 panic("netlink_init: Cannot allocate nl_table\n"); 2679 } 2680 2681 core_initcall(netlink_proto_init); 2682