1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 13 * added netlink_proto_exit 14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 15 * use nlk_sk, as sk->protinfo is on a diet 8) 16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 17 * - inc module use count of module that owns 18 * the kernel socket in case userspace opens 19 * socket of same protocol 20 * - remove all module support, since netlink is 21 * mandatory if CONFIG_NET=y these days 22 */ 23 24 #include <linux/module.h> 25 26 #include <linux/capability.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/signal.h> 30 #include <linux/sched.h> 31 #include <linux/errno.h> 32 #include <linux/string.h> 33 #include <linux/stat.h> 34 #include <linux/socket.h> 35 #include <linux/un.h> 36 #include <linux/fcntl.h> 37 #include <linux/termios.h> 38 #include <linux/sockios.h> 39 #include <linux/net.h> 40 #include <linux/fs.h> 41 #include <linux/slab.h> 42 #include <asm/uaccess.h> 43 #include <linux/skbuff.h> 44 #include <linux/netdevice.h> 45 #include <linux/rtnetlink.h> 46 #include <linux/proc_fs.h> 47 #include <linux/seq_file.h> 48 #include <linux/notifier.h> 49 #include <linux/security.h> 50 #include <linux/jhash.h> 51 #include <linux/jiffies.h> 52 #include <linux/random.h> 53 #include <linux/bitops.h> 54 #include <linux/mm.h> 55 #include <linux/types.h> 56 #include <linux/audit.h> 57 #include <linux/mutex.h> 58 59 #include <net/net_namespace.h> 60 #include <net/sock.h> 61 #include <net/scm.h> 62 #include <net/netlink.h> 63 64 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 65 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) 66 67 struct netlink_sock { 68 /* struct sock has to be the first member of netlink_sock */ 69 struct sock sk; 70 u32 pid; 71 u32 dst_pid; 72 u32 dst_group; 73 u32 flags; 74 u32 subscriptions; 75 u32 ngroups; 76 unsigned long *groups; 77 unsigned long state; 78 wait_queue_head_t wait; 79 struct netlink_callback *cb; 80 struct mutex *cb_mutex; 81 struct mutex cb_def_mutex; 82 void (*netlink_rcv)(struct sk_buff *skb); 83 struct module *module; 84 }; 85 86 struct listeners_rcu_head { 87 struct rcu_head rcu_head; 88 void *ptr; 89 }; 90 91 #define NETLINK_KERNEL_SOCKET 0x1 92 #define NETLINK_RECV_PKTINFO 0x2 93 #define NETLINK_BROADCAST_SEND_ERROR 0x4 94 #define NETLINK_RECV_NO_ENOBUFS 0x8 95 96 static inline struct netlink_sock *nlk_sk(struct sock *sk) 97 { 98 return container_of(sk, struct netlink_sock, sk); 99 } 100 101 static inline int netlink_is_kernel(struct sock *sk) 102 { 103 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 104 } 105 106 struct nl_pid_hash { 107 struct hlist_head *table; 108 unsigned long rehash_time; 109 110 unsigned int mask; 111 unsigned int shift; 112 113 unsigned int entries; 114 unsigned int max_shift; 115 116 u32 rnd; 117 }; 118 119 struct netlink_table { 120 struct nl_pid_hash hash; 121 struct hlist_head mc_list; 122 unsigned long *listeners; 123 unsigned int nl_nonroot; 124 unsigned int groups; 125 struct mutex *cb_mutex; 126 struct module *module; 127 int registered; 128 }; 129 130 static struct netlink_table *nl_table; 131 132 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 133 134 static int netlink_dump(struct sock *sk); 135 static void netlink_destroy_callback(struct netlink_callback *cb); 136 137 static DEFINE_RWLOCK(nl_table_lock); 138 static atomic_t nl_table_users = ATOMIC_INIT(0); 139 140 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 141 142 static u32 netlink_group_mask(u32 group) 143 { 144 return group ? 1 << (group - 1) : 0; 145 } 146 147 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) 148 { 149 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; 150 } 151 152 static void netlink_sock_destruct(struct sock *sk) 153 { 154 struct netlink_sock *nlk = nlk_sk(sk); 155 156 if (nlk->cb) { 157 if (nlk->cb->done) 158 nlk->cb->done(nlk->cb); 159 netlink_destroy_callback(nlk->cb); 160 } 161 162 skb_queue_purge(&sk->sk_receive_queue); 163 164 if (!sock_flag(sk, SOCK_DEAD)) { 165 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 166 return; 167 } 168 169 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 170 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 171 WARN_ON(nlk_sk(sk)->groups); 172 } 173 174 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 175 * SMP. Look, when several writers sleep and reader wakes them up, all but one 176 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 177 * this, _but_ remember, it adds useless work on UP machines. 178 */ 179 180 void netlink_table_grab(void) 181 __acquires(nl_table_lock) 182 { 183 might_sleep(); 184 185 write_lock_irq(&nl_table_lock); 186 187 if (atomic_read(&nl_table_users)) { 188 DECLARE_WAITQUEUE(wait, current); 189 190 add_wait_queue_exclusive(&nl_table_wait, &wait); 191 for (;;) { 192 set_current_state(TASK_UNINTERRUPTIBLE); 193 if (atomic_read(&nl_table_users) == 0) 194 break; 195 write_unlock_irq(&nl_table_lock); 196 schedule(); 197 write_lock_irq(&nl_table_lock); 198 } 199 200 __set_current_state(TASK_RUNNING); 201 remove_wait_queue(&nl_table_wait, &wait); 202 } 203 } 204 205 void netlink_table_ungrab(void) 206 __releases(nl_table_lock) 207 { 208 write_unlock_irq(&nl_table_lock); 209 wake_up(&nl_table_wait); 210 } 211 212 static inline void 213 netlink_lock_table(void) 214 { 215 /* read_lock() synchronizes us to netlink_table_grab */ 216 217 read_lock(&nl_table_lock); 218 atomic_inc(&nl_table_users); 219 read_unlock(&nl_table_lock); 220 } 221 222 static inline void 223 netlink_unlock_table(void) 224 { 225 if (atomic_dec_and_test(&nl_table_users)) 226 wake_up(&nl_table_wait); 227 } 228 229 static inline struct sock *netlink_lookup(struct net *net, int protocol, 230 u32 pid) 231 { 232 struct nl_pid_hash *hash = &nl_table[protocol].hash; 233 struct hlist_head *head; 234 struct sock *sk; 235 struct hlist_node *node; 236 237 read_lock(&nl_table_lock); 238 head = nl_pid_hashfn(hash, pid); 239 sk_for_each(sk, node, head) { 240 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { 241 sock_hold(sk); 242 goto found; 243 } 244 } 245 sk = NULL; 246 found: 247 read_unlock(&nl_table_lock); 248 return sk; 249 } 250 251 static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) 252 { 253 if (size <= PAGE_SIZE) 254 return kzalloc(size, GFP_ATOMIC); 255 else 256 return (struct hlist_head *) 257 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 258 get_order(size)); 259 } 260 261 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 262 { 263 if (size <= PAGE_SIZE) 264 kfree(table); 265 else 266 free_pages((unsigned long)table, get_order(size)); 267 } 268 269 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) 270 { 271 unsigned int omask, mask, shift; 272 size_t osize, size; 273 struct hlist_head *otable, *table; 274 int i; 275 276 omask = mask = hash->mask; 277 osize = size = (mask + 1) * sizeof(*table); 278 shift = hash->shift; 279 280 if (grow) { 281 if (++shift > hash->max_shift) 282 return 0; 283 mask = mask * 2 + 1; 284 size *= 2; 285 } 286 287 table = nl_pid_hash_zalloc(size); 288 if (!table) 289 return 0; 290 291 otable = hash->table; 292 hash->table = table; 293 hash->mask = mask; 294 hash->shift = shift; 295 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 296 297 for (i = 0; i <= omask; i++) { 298 struct sock *sk; 299 struct hlist_node *node, *tmp; 300 301 sk_for_each_safe(sk, node, tmp, &otable[i]) 302 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); 303 } 304 305 nl_pid_hash_free(otable, osize); 306 hash->rehash_time = jiffies + 10 * 60 * HZ; 307 return 1; 308 } 309 310 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) 311 { 312 int avg = hash->entries >> hash->shift; 313 314 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) 315 return 1; 316 317 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 318 nl_pid_hash_rehash(hash, 0); 319 return 1; 320 } 321 322 return 0; 323 } 324 325 static const struct proto_ops netlink_ops; 326 327 static void 328 netlink_update_listeners(struct sock *sk) 329 { 330 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 331 struct hlist_node *node; 332 unsigned long mask; 333 unsigned int i; 334 335 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 336 mask = 0; 337 sk_for_each_bound(sk, node, &tbl->mc_list) { 338 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 339 mask |= nlk_sk(sk)->groups[i]; 340 } 341 tbl->listeners[i] = mask; 342 } 343 /* this function is only called with the netlink table "grabbed", which 344 * makes sure updates are visible before bind or setsockopt return. */ 345 } 346 347 static int netlink_insert(struct sock *sk, struct net *net, u32 pid) 348 { 349 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 350 struct hlist_head *head; 351 int err = -EADDRINUSE; 352 struct sock *osk; 353 struct hlist_node *node; 354 int len; 355 356 netlink_table_grab(); 357 head = nl_pid_hashfn(hash, pid); 358 len = 0; 359 sk_for_each(osk, node, head) { 360 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) 361 break; 362 len++; 363 } 364 if (node) 365 goto err; 366 367 err = -EBUSY; 368 if (nlk_sk(sk)->pid) 369 goto err; 370 371 err = -ENOMEM; 372 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 373 goto err; 374 375 if (len && nl_pid_hash_dilute(hash, len)) 376 head = nl_pid_hashfn(hash, pid); 377 hash->entries++; 378 nlk_sk(sk)->pid = pid; 379 sk_add_node(sk, head); 380 err = 0; 381 382 err: 383 netlink_table_ungrab(); 384 return err; 385 } 386 387 static void netlink_remove(struct sock *sk) 388 { 389 netlink_table_grab(); 390 if (sk_del_node_init(sk)) 391 nl_table[sk->sk_protocol].hash.entries--; 392 if (nlk_sk(sk)->subscriptions) 393 __sk_del_bind_node(sk); 394 netlink_table_ungrab(); 395 } 396 397 static struct proto netlink_proto = { 398 .name = "NETLINK", 399 .owner = THIS_MODULE, 400 .obj_size = sizeof(struct netlink_sock), 401 }; 402 403 static int __netlink_create(struct net *net, struct socket *sock, 404 struct mutex *cb_mutex, int protocol) 405 { 406 struct sock *sk; 407 struct netlink_sock *nlk; 408 409 sock->ops = &netlink_ops; 410 411 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 412 if (!sk) 413 return -ENOMEM; 414 415 sock_init_data(sock, sk); 416 417 nlk = nlk_sk(sk); 418 if (cb_mutex) 419 nlk->cb_mutex = cb_mutex; 420 else { 421 nlk->cb_mutex = &nlk->cb_def_mutex; 422 mutex_init(nlk->cb_mutex); 423 } 424 init_waitqueue_head(&nlk->wait); 425 426 sk->sk_destruct = netlink_sock_destruct; 427 sk->sk_protocol = protocol; 428 return 0; 429 } 430 431 static int netlink_create(struct net *net, struct socket *sock, int protocol, 432 int kern) 433 { 434 struct module *module = NULL; 435 struct mutex *cb_mutex; 436 struct netlink_sock *nlk; 437 int err = 0; 438 439 sock->state = SS_UNCONNECTED; 440 441 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 442 return -ESOCKTNOSUPPORT; 443 444 if (protocol < 0 || protocol >= MAX_LINKS) 445 return -EPROTONOSUPPORT; 446 447 netlink_lock_table(); 448 #ifdef CONFIG_MODULES 449 if (!nl_table[protocol].registered) { 450 netlink_unlock_table(); 451 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 452 netlink_lock_table(); 453 } 454 #endif 455 if (nl_table[protocol].registered && 456 try_module_get(nl_table[protocol].module)) 457 module = nl_table[protocol].module; 458 else 459 err = -EPROTONOSUPPORT; 460 cb_mutex = nl_table[protocol].cb_mutex; 461 netlink_unlock_table(); 462 463 if (err < 0) 464 goto out; 465 466 err = __netlink_create(net, sock, cb_mutex, protocol); 467 if (err < 0) 468 goto out_module; 469 470 local_bh_disable(); 471 sock_prot_inuse_add(net, &netlink_proto, 1); 472 local_bh_enable(); 473 474 nlk = nlk_sk(sock->sk); 475 nlk->module = module; 476 out: 477 return err; 478 479 out_module: 480 module_put(module); 481 goto out; 482 } 483 484 static int netlink_release(struct socket *sock) 485 { 486 struct sock *sk = sock->sk; 487 struct netlink_sock *nlk; 488 489 if (!sk) 490 return 0; 491 492 netlink_remove(sk); 493 sock_orphan(sk); 494 nlk = nlk_sk(sk); 495 496 /* 497 * OK. Socket is unlinked, any packets that arrive now 498 * will be purged. 499 */ 500 501 sock->sk = NULL; 502 wake_up_interruptible_all(&nlk->wait); 503 504 skb_queue_purge(&sk->sk_write_queue); 505 506 if (nlk->pid) { 507 struct netlink_notify n = { 508 .net = sock_net(sk), 509 .protocol = sk->sk_protocol, 510 .pid = nlk->pid, 511 }; 512 atomic_notifier_call_chain(&netlink_chain, 513 NETLINK_URELEASE, &n); 514 } 515 516 module_put(nlk->module); 517 518 netlink_table_grab(); 519 if (netlink_is_kernel(sk)) { 520 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 521 if (--nl_table[sk->sk_protocol].registered == 0) { 522 kfree(nl_table[sk->sk_protocol].listeners); 523 nl_table[sk->sk_protocol].module = NULL; 524 nl_table[sk->sk_protocol].registered = 0; 525 } 526 } else if (nlk->subscriptions) 527 netlink_update_listeners(sk); 528 netlink_table_ungrab(); 529 530 kfree(nlk->groups); 531 nlk->groups = NULL; 532 533 local_bh_disable(); 534 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 535 local_bh_enable(); 536 sock_put(sk); 537 return 0; 538 } 539 540 static int netlink_autobind(struct socket *sock) 541 { 542 struct sock *sk = sock->sk; 543 struct net *net = sock_net(sk); 544 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 545 struct hlist_head *head; 546 struct sock *osk; 547 struct hlist_node *node; 548 s32 pid = current->tgid; 549 int err; 550 static s32 rover = -4097; 551 552 retry: 553 cond_resched(); 554 netlink_table_grab(); 555 head = nl_pid_hashfn(hash, pid); 556 sk_for_each(osk, node, head) { 557 if (!net_eq(sock_net(osk), net)) 558 continue; 559 if (nlk_sk(osk)->pid == pid) { 560 /* Bind collision, search negative pid values. */ 561 pid = rover--; 562 if (rover > -4097) 563 rover = -4097; 564 netlink_table_ungrab(); 565 goto retry; 566 } 567 } 568 netlink_table_ungrab(); 569 570 err = netlink_insert(sk, net, pid); 571 if (err == -EADDRINUSE) 572 goto retry; 573 574 /* If 2 threads race to autobind, that is fine. */ 575 if (err == -EBUSY) 576 err = 0; 577 578 return err; 579 } 580 581 static inline int netlink_capable(struct socket *sock, unsigned int flag) 582 { 583 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || 584 capable(CAP_NET_ADMIN); 585 } 586 587 static void 588 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 589 { 590 struct netlink_sock *nlk = nlk_sk(sk); 591 592 if (nlk->subscriptions && !subscriptions) 593 __sk_del_bind_node(sk); 594 else if (!nlk->subscriptions && subscriptions) 595 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 596 nlk->subscriptions = subscriptions; 597 } 598 599 static int netlink_realloc_groups(struct sock *sk) 600 { 601 struct netlink_sock *nlk = nlk_sk(sk); 602 unsigned int groups; 603 unsigned long *new_groups; 604 int err = 0; 605 606 netlink_table_grab(); 607 608 groups = nl_table[sk->sk_protocol].groups; 609 if (!nl_table[sk->sk_protocol].registered) { 610 err = -ENOENT; 611 goto out_unlock; 612 } 613 614 if (nlk->ngroups >= groups) 615 goto out_unlock; 616 617 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 618 if (new_groups == NULL) { 619 err = -ENOMEM; 620 goto out_unlock; 621 } 622 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 623 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 624 625 nlk->groups = new_groups; 626 nlk->ngroups = groups; 627 out_unlock: 628 netlink_table_ungrab(); 629 return err; 630 } 631 632 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 633 int addr_len) 634 { 635 struct sock *sk = sock->sk; 636 struct net *net = sock_net(sk); 637 struct netlink_sock *nlk = nlk_sk(sk); 638 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 639 int err; 640 641 if (nladdr->nl_family != AF_NETLINK) 642 return -EINVAL; 643 644 /* Only superuser is allowed to listen multicasts */ 645 if (nladdr->nl_groups) { 646 if (!netlink_capable(sock, NL_NONROOT_RECV)) 647 return -EPERM; 648 err = netlink_realloc_groups(sk); 649 if (err) 650 return err; 651 } 652 653 if (nlk->pid) { 654 if (nladdr->nl_pid != nlk->pid) 655 return -EINVAL; 656 } else { 657 err = nladdr->nl_pid ? 658 netlink_insert(sk, net, nladdr->nl_pid) : 659 netlink_autobind(sock); 660 if (err) 661 return err; 662 } 663 664 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 665 return 0; 666 667 netlink_table_grab(); 668 netlink_update_subscriptions(sk, nlk->subscriptions + 669 hweight32(nladdr->nl_groups) - 670 hweight32(nlk->groups[0])); 671 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 672 netlink_update_listeners(sk); 673 netlink_table_ungrab(); 674 675 return 0; 676 } 677 678 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 679 int alen, int flags) 680 { 681 int err = 0; 682 struct sock *sk = sock->sk; 683 struct netlink_sock *nlk = nlk_sk(sk); 684 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 685 686 if (addr->sa_family == AF_UNSPEC) { 687 sk->sk_state = NETLINK_UNCONNECTED; 688 nlk->dst_pid = 0; 689 nlk->dst_group = 0; 690 return 0; 691 } 692 if (addr->sa_family != AF_NETLINK) 693 return -EINVAL; 694 695 /* Only superuser is allowed to send multicasts */ 696 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) 697 return -EPERM; 698 699 if (!nlk->pid) 700 err = netlink_autobind(sock); 701 702 if (err == 0) { 703 sk->sk_state = NETLINK_CONNECTED; 704 nlk->dst_pid = nladdr->nl_pid; 705 nlk->dst_group = ffs(nladdr->nl_groups); 706 } 707 708 return err; 709 } 710 711 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 712 int *addr_len, int peer) 713 { 714 struct sock *sk = sock->sk; 715 struct netlink_sock *nlk = nlk_sk(sk); 716 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 717 718 nladdr->nl_family = AF_NETLINK; 719 nladdr->nl_pad = 0; 720 *addr_len = sizeof(*nladdr); 721 722 if (peer) { 723 nladdr->nl_pid = nlk->dst_pid; 724 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 725 } else { 726 nladdr->nl_pid = nlk->pid; 727 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 728 } 729 return 0; 730 } 731 732 static void netlink_overrun(struct sock *sk) 733 { 734 struct netlink_sock *nlk = nlk_sk(sk); 735 736 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { 737 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 738 sk->sk_err = ENOBUFS; 739 sk->sk_error_report(sk); 740 } 741 } 742 atomic_inc(&sk->sk_drops); 743 } 744 745 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 746 { 747 struct sock *sock; 748 struct netlink_sock *nlk; 749 750 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); 751 if (!sock) 752 return ERR_PTR(-ECONNREFUSED); 753 754 /* Don't bother queuing skb if kernel socket has no input function */ 755 nlk = nlk_sk(sock); 756 if (sock->sk_state == NETLINK_CONNECTED && 757 nlk->dst_pid != nlk_sk(ssk)->pid) { 758 sock_put(sock); 759 return ERR_PTR(-ECONNREFUSED); 760 } 761 return sock; 762 } 763 764 struct sock *netlink_getsockbyfilp(struct file *filp) 765 { 766 struct inode *inode = filp->f_path.dentry->d_inode; 767 struct sock *sock; 768 769 if (!S_ISSOCK(inode->i_mode)) 770 return ERR_PTR(-ENOTSOCK); 771 772 sock = SOCKET_I(inode)->sk; 773 if (sock->sk_family != AF_NETLINK) 774 return ERR_PTR(-EINVAL); 775 776 sock_hold(sock); 777 return sock; 778 } 779 780 /* 781 * Attach a skb to a netlink socket. 782 * The caller must hold a reference to the destination socket. On error, the 783 * reference is dropped. The skb is not send to the destination, just all 784 * all error checks are performed and memory in the queue is reserved. 785 * Return values: 786 * < 0: error. skb freed, reference to sock dropped. 787 * 0: continue 788 * 1: repeat lookup - reference dropped while waiting for socket memory. 789 */ 790 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 791 long *timeo, struct sock *ssk) 792 { 793 struct netlink_sock *nlk; 794 795 nlk = nlk_sk(sk); 796 797 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 798 test_bit(0, &nlk->state)) { 799 DECLARE_WAITQUEUE(wait, current); 800 if (!*timeo) { 801 if (!ssk || netlink_is_kernel(ssk)) 802 netlink_overrun(sk); 803 sock_put(sk); 804 kfree_skb(skb); 805 return -EAGAIN; 806 } 807 808 __set_current_state(TASK_INTERRUPTIBLE); 809 add_wait_queue(&nlk->wait, &wait); 810 811 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 812 test_bit(0, &nlk->state)) && 813 !sock_flag(sk, SOCK_DEAD)) 814 *timeo = schedule_timeout(*timeo); 815 816 __set_current_state(TASK_RUNNING); 817 remove_wait_queue(&nlk->wait, &wait); 818 sock_put(sk); 819 820 if (signal_pending(current)) { 821 kfree_skb(skb); 822 return sock_intr_errno(*timeo); 823 } 824 return 1; 825 } 826 skb_set_owner_r(skb, sk); 827 return 0; 828 } 829 830 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 831 { 832 int len = skb->len; 833 834 skb_queue_tail(&sk->sk_receive_queue, skb); 835 sk->sk_data_ready(sk, len); 836 sock_put(sk); 837 return len; 838 } 839 840 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 841 { 842 kfree_skb(skb); 843 sock_put(sk); 844 } 845 846 static inline struct sk_buff *netlink_trim(struct sk_buff *skb, 847 gfp_t allocation) 848 { 849 int delta; 850 851 skb_orphan(skb); 852 853 delta = skb->end - skb->tail; 854 if (delta * 2 < skb->truesize) 855 return skb; 856 857 if (skb_shared(skb)) { 858 struct sk_buff *nskb = skb_clone(skb, allocation); 859 if (!nskb) 860 return skb; 861 kfree_skb(skb); 862 skb = nskb; 863 } 864 865 if (!pskb_expand_head(skb, 0, -delta, allocation)) 866 skb->truesize -= delta; 867 868 return skb; 869 } 870 871 static inline void netlink_rcv_wake(struct sock *sk) 872 { 873 struct netlink_sock *nlk = nlk_sk(sk); 874 875 if (skb_queue_empty(&sk->sk_receive_queue)) 876 clear_bit(0, &nlk->state); 877 if (!test_bit(0, &nlk->state)) 878 wake_up_interruptible(&nlk->wait); 879 } 880 881 static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) 882 { 883 int ret; 884 struct netlink_sock *nlk = nlk_sk(sk); 885 886 ret = -ECONNREFUSED; 887 if (nlk->netlink_rcv != NULL) { 888 ret = skb->len; 889 skb_set_owner_r(skb, sk); 890 nlk->netlink_rcv(skb); 891 } 892 kfree_skb(skb); 893 sock_put(sk); 894 return ret; 895 } 896 897 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 898 u32 pid, int nonblock) 899 { 900 struct sock *sk; 901 int err; 902 long timeo; 903 904 skb = netlink_trim(skb, gfp_any()); 905 906 timeo = sock_sndtimeo(ssk, nonblock); 907 retry: 908 sk = netlink_getsockbypid(ssk, pid); 909 if (IS_ERR(sk)) { 910 kfree_skb(skb); 911 return PTR_ERR(sk); 912 } 913 if (netlink_is_kernel(sk)) 914 return netlink_unicast_kernel(sk, skb); 915 916 if (sk_filter(sk, skb)) { 917 err = skb->len; 918 kfree_skb(skb); 919 sock_put(sk); 920 return err; 921 } 922 923 err = netlink_attachskb(sk, skb, &timeo, ssk); 924 if (err == 1) 925 goto retry; 926 if (err) 927 return err; 928 929 return netlink_sendskb(sk, skb); 930 } 931 EXPORT_SYMBOL(netlink_unicast); 932 933 int netlink_has_listeners(struct sock *sk, unsigned int group) 934 { 935 int res = 0; 936 unsigned long *listeners; 937 938 BUG_ON(!netlink_is_kernel(sk)); 939 940 rcu_read_lock(); 941 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 942 943 if (group - 1 < nl_table[sk->sk_protocol].groups) 944 res = test_bit(group - 1, listeners); 945 946 rcu_read_unlock(); 947 948 return res; 949 } 950 EXPORT_SYMBOL_GPL(netlink_has_listeners); 951 952 static inline int netlink_broadcast_deliver(struct sock *sk, 953 struct sk_buff *skb) 954 { 955 struct netlink_sock *nlk = nlk_sk(sk); 956 957 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 958 !test_bit(0, &nlk->state)) { 959 skb_set_owner_r(skb, sk); 960 skb_queue_tail(&sk->sk_receive_queue, skb); 961 sk->sk_data_ready(sk, skb->len); 962 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; 963 } 964 return -1; 965 } 966 967 struct netlink_broadcast_data { 968 struct sock *exclude_sk; 969 struct net *net; 970 u32 pid; 971 u32 group; 972 int failure; 973 int delivery_failure; 974 int congested; 975 int delivered; 976 gfp_t allocation; 977 struct sk_buff *skb, *skb2; 978 }; 979 980 static inline int do_one_broadcast(struct sock *sk, 981 struct netlink_broadcast_data *p) 982 { 983 struct netlink_sock *nlk = nlk_sk(sk); 984 int val; 985 986 if (p->exclude_sk == sk) 987 goto out; 988 989 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 990 !test_bit(p->group - 1, nlk->groups)) 991 goto out; 992 993 if (!net_eq(sock_net(sk), p->net)) 994 goto out; 995 996 if (p->failure) { 997 netlink_overrun(sk); 998 goto out; 999 } 1000 1001 sock_hold(sk); 1002 if (p->skb2 == NULL) { 1003 if (skb_shared(p->skb)) { 1004 p->skb2 = skb_clone(p->skb, p->allocation); 1005 } else { 1006 p->skb2 = skb_get(p->skb); 1007 /* 1008 * skb ownership may have been set when 1009 * delivered to a previous socket. 1010 */ 1011 skb_orphan(p->skb2); 1012 } 1013 } 1014 if (p->skb2 == NULL) { 1015 netlink_overrun(sk); 1016 /* Clone failed. Notify ALL listeners. */ 1017 p->failure = 1; 1018 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1019 p->delivery_failure = 1; 1020 } else if (sk_filter(sk, p->skb2)) { 1021 kfree_skb(p->skb2); 1022 p->skb2 = NULL; 1023 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1024 netlink_overrun(sk); 1025 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1026 p->delivery_failure = 1; 1027 } else { 1028 p->congested |= val; 1029 p->delivered = 1; 1030 p->skb2 = NULL; 1031 } 1032 sock_put(sk); 1033 1034 out: 1035 return 0; 1036 } 1037 1038 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 1039 u32 group, gfp_t allocation) 1040 { 1041 struct net *net = sock_net(ssk); 1042 struct netlink_broadcast_data info; 1043 struct hlist_node *node; 1044 struct sock *sk; 1045 1046 skb = netlink_trim(skb, allocation); 1047 1048 info.exclude_sk = ssk; 1049 info.net = net; 1050 info.pid = pid; 1051 info.group = group; 1052 info.failure = 0; 1053 info.delivery_failure = 0; 1054 info.congested = 0; 1055 info.delivered = 0; 1056 info.allocation = allocation; 1057 info.skb = skb; 1058 info.skb2 = NULL; 1059 1060 /* While we sleep in clone, do not allow to change socket list */ 1061 1062 netlink_lock_table(); 1063 1064 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1065 do_one_broadcast(sk, &info); 1066 1067 kfree_skb(skb); 1068 1069 netlink_unlock_table(); 1070 1071 kfree_skb(info.skb2); 1072 1073 if (info.delivery_failure) 1074 return -ENOBUFS; 1075 1076 if (info.delivered) { 1077 if (info.congested && (allocation & __GFP_WAIT)) 1078 yield(); 1079 return 0; 1080 } 1081 return -ESRCH; 1082 } 1083 EXPORT_SYMBOL(netlink_broadcast); 1084 1085 struct netlink_set_err_data { 1086 struct sock *exclude_sk; 1087 u32 pid; 1088 u32 group; 1089 int code; 1090 }; 1091 1092 static inline int do_one_set_err(struct sock *sk, 1093 struct netlink_set_err_data *p) 1094 { 1095 struct netlink_sock *nlk = nlk_sk(sk); 1096 1097 if (sk == p->exclude_sk) 1098 goto out; 1099 1100 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1101 goto out; 1102 1103 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 1104 !test_bit(p->group - 1, nlk->groups)) 1105 goto out; 1106 1107 sk->sk_err = p->code; 1108 sk->sk_error_report(sk); 1109 out: 1110 return 0; 1111 } 1112 1113 /** 1114 * netlink_set_err - report error to broadcast listeners 1115 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1116 * @pid: the PID of a process that we want to skip (if any) 1117 * @groups: the broadcast group that will notice the error 1118 * @code: error code, must be negative (as usual in kernelspace) 1119 */ 1120 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) 1121 { 1122 struct netlink_set_err_data info; 1123 struct hlist_node *node; 1124 struct sock *sk; 1125 1126 info.exclude_sk = ssk; 1127 info.pid = pid; 1128 info.group = group; 1129 /* sk->sk_err wants a positive error value */ 1130 info.code = -code; 1131 1132 read_lock(&nl_table_lock); 1133 1134 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1135 do_one_set_err(sk, &info); 1136 1137 read_unlock(&nl_table_lock); 1138 } 1139 EXPORT_SYMBOL(netlink_set_err); 1140 1141 /* must be called with netlink table grabbed */ 1142 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1143 unsigned int group, 1144 int is_new) 1145 { 1146 int old, new = !!is_new, subscriptions; 1147 1148 old = test_bit(group - 1, nlk->groups); 1149 subscriptions = nlk->subscriptions - old + new; 1150 if (new) 1151 __set_bit(group - 1, nlk->groups); 1152 else 1153 __clear_bit(group - 1, nlk->groups); 1154 netlink_update_subscriptions(&nlk->sk, subscriptions); 1155 netlink_update_listeners(&nlk->sk); 1156 } 1157 1158 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1159 char __user *optval, unsigned int optlen) 1160 { 1161 struct sock *sk = sock->sk; 1162 struct netlink_sock *nlk = nlk_sk(sk); 1163 unsigned int val = 0; 1164 int err; 1165 1166 if (level != SOL_NETLINK) 1167 return -ENOPROTOOPT; 1168 1169 if (optlen >= sizeof(int) && 1170 get_user(val, (unsigned int __user *)optval)) 1171 return -EFAULT; 1172 1173 switch (optname) { 1174 case NETLINK_PKTINFO: 1175 if (val) 1176 nlk->flags |= NETLINK_RECV_PKTINFO; 1177 else 1178 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1179 err = 0; 1180 break; 1181 case NETLINK_ADD_MEMBERSHIP: 1182 case NETLINK_DROP_MEMBERSHIP: { 1183 if (!netlink_capable(sock, NL_NONROOT_RECV)) 1184 return -EPERM; 1185 err = netlink_realloc_groups(sk); 1186 if (err) 1187 return err; 1188 if (!val || val - 1 >= nlk->ngroups) 1189 return -EINVAL; 1190 netlink_table_grab(); 1191 netlink_update_socket_mc(nlk, val, 1192 optname == NETLINK_ADD_MEMBERSHIP); 1193 netlink_table_ungrab(); 1194 err = 0; 1195 break; 1196 } 1197 case NETLINK_BROADCAST_ERROR: 1198 if (val) 1199 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; 1200 else 1201 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; 1202 err = 0; 1203 break; 1204 case NETLINK_NO_ENOBUFS: 1205 if (val) { 1206 nlk->flags |= NETLINK_RECV_NO_ENOBUFS; 1207 clear_bit(0, &nlk->state); 1208 wake_up_interruptible(&nlk->wait); 1209 } else 1210 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; 1211 err = 0; 1212 break; 1213 default: 1214 err = -ENOPROTOOPT; 1215 } 1216 return err; 1217 } 1218 1219 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1220 char __user *optval, int __user *optlen) 1221 { 1222 struct sock *sk = sock->sk; 1223 struct netlink_sock *nlk = nlk_sk(sk); 1224 int len, val, err; 1225 1226 if (level != SOL_NETLINK) 1227 return -ENOPROTOOPT; 1228 1229 if (get_user(len, optlen)) 1230 return -EFAULT; 1231 if (len < 0) 1232 return -EINVAL; 1233 1234 switch (optname) { 1235 case NETLINK_PKTINFO: 1236 if (len < sizeof(int)) 1237 return -EINVAL; 1238 len = sizeof(int); 1239 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 1240 if (put_user(len, optlen) || 1241 put_user(val, optval)) 1242 return -EFAULT; 1243 err = 0; 1244 break; 1245 case NETLINK_BROADCAST_ERROR: 1246 if (len < sizeof(int)) 1247 return -EINVAL; 1248 len = sizeof(int); 1249 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; 1250 if (put_user(len, optlen) || 1251 put_user(val, optval)) 1252 return -EFAULT; 1253 err = 0; 1254 break; 1255 case NETLINK_NO_ENOBUFS: 1256 if (len < sizeof(int)) 1257 return -EINVAL; 1258 len = sizeof(int); 1259 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; 1260 if (put_user(len, optlen) || 1261 put_user(val, optval)) 1262 return -EFAULT; 1263 err = 0; 1264 break; 1265 default: 1266 err = -ENOPROTOOPT; 1267 } 1268 return err; 1269 } 1270 1271 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1272 { 1273 struct nl_pktinfo info; 1274 1275 info.group = NETLINK_CB(skb).dst_group; 1276 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1277 } 1278 1279 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1280 struct msghdr *msg, size_t len) 1281 { 1282 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1283 struct sock *sk = sock->sk; 1284 struct netlink_sock *nlk = nlk_sk(sk); 1285 struct sockaddr_nl *addr = msg->msg_name; 1286 u32 dst_pid; 1287 u32 dst_group; 1288 struct sk_buff *skb; 1289 int err; 1290 struct scm_cookie scm; 1291 1292 if (msg->msg_flags&MSG_OOB) 1293 return -EOPNOTSUPP; 1294 1295 if (NULL == siocb->scm) 1296 siocb->scm = &scm; 1297 err = scm_send(sock, msg, siocb->scm); 1298 if (err < 0) 1299 return err; 1300 1301 if (msg->msg_namelen) { 1302 if (addr->nl_family != AF_NETLINK) 1303 return -EINVAL; 1304 dst_pid = addr->nl_pid; 1305 dst_group = ffs(addr->nl_groups); 1306 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1307 return -EPERM; 1308 } else { 1309 dst_pid = nlk->dst_pid; 1310 dst_group = nlk->dst_group; 1311 } 1312 1313 if (!nlk->pid) { 1314 err = netlink_autobind(sock); 1315 if (err) 1316 goto out; 1317 } 1318 1319 err = -EMSGSIZE; 1320 if (len > sk->sk_sndbuf - 32) 1321 goto out; 1322 err = -ENOBUFS; 1323 skb = alloc_skb(len, GFP_KERNEL); 1324 if (skb == NULL) 1325 goto out; 1326 1327 NETLINK_CB(skb).pid = nlk->pid; 1328 NETLINK_CB(skb).dst_group = dst_group; 1329 NETLINK_CB(skb).loginuid = audit_get_loginuid(current); 1330 NETLINK_CB(skb).sessionid = audit_get_sessionid(current); 1331 security_task_getsecid(current, &(NETLINK_CB(skb).sid)); 1332 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1333 1334 /* What can I do? Netlink is asynchronous, so that 1335 we will have to save current capabilities to 1336 check them, when this message will be delivered 1337 to corresponding kernel module. --ANK (980802) 1338 */ 1339 1340 err = -EFAULT; 1341 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1342 kfree_skb(skb); 1343 goto out; 1344 } 1345 1346 err = security_netlink_send(sk, skb); 1347 if (err) { 1348 kfree_skb(skb); 1349 goto out; 1350 } 1351 1352 if (dst_group) { 1353 atomic_inc(&skb->users); 1354 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); 1355 } 1356 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1357 1358 out: 1359 return err; 1360 } 1361 1362 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 1363 struct msghdr *msg, size_t len, 1364 int flags) 1365 { 1366 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1367 struct scm_cookie scm; 1368 struct sock *sk = sock->sk; 1369 struct netlink_sock *nlk = nlk_sk(sk); 1370 int noblock = flags&MSG_DONTWAIT; 1371 size_t copied; 1372 struct sk_buff *skb, *frag __maybe_unused = NULL; 1373 int err; 1374 1375 if (flags&MSG_OOB) 1376 return -EOPNOTSUPP; 1377 1378 copied = 0; 1379 1380 skb = skb_recv_datagram(sk, flags, noblock, &err); 1381 if (skb == NULL) 1382 goto out; 1383 1384 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1385 if (unlikely(skb_shinfo(skb)->frag_list)) { 1386 bool need_compat = !!(flags & MSG_CMSG_COMPAT); 1387 1388 /* 1389 * If this skb has a frag_list, then here that means that 1390 * we will have to use the frag_list skb for compat tasks 1391 * and the regular skb for non-compat tasks. 1392 * 1393 * The skb might (and likely will) be cloned, so we can't 1394 * just reset frag_list and go on with things -- we need to 1395 * keep that. For the compat case that's easy -- simply get 1396 * a reference to the compat skb and free the regular one 1397 * including the frag. For the non-compat case, we need to 1398 * avoid sending the frag to the user -- so assign NULL but 1399 * restore it below before freeing the skb. 1400 */ 1401 if (need_compat) { 1402 struct sk_buff *compskb = skb_shinfo(skb)->frag_list; 1403 skb_get(compskb); 1404 kfree_skb(skb); 1405 skb = compskb; 1406 } else { 1407 frag = skb_shinfo(skb)->frag_list; 1408 skb_shinfo(skb)->frag_list = NULL; 1409 } 1410 } 1411 #endif 1412 1413 msg->msg_namelen = 0; 1414 1415 copied = skb->len; 1416 if (len < copied) { 1417 msg->msg_flags |= MSG_TRUNC; 1418 copied = len; 1419 } 1420 1421 skb_reset_transport_header(skb); 1422 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1423 1424 if (msg->msg_name) { 1425 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; 1426 addr->nl_family = AF_NETLINK; 1427 addr->nl_pad = 0; 1428 addr->nl_pid = NETLINK_CB(skb).pid; 1429 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1430 msg->msg_namelen = sizeof(*addr); 1431 } 1432 1433 if (nlk->flags & NETLINK_RECV_PKTINFO) 1434 netlink_cmsg_recv_pktinfo(msg, skb); 1435 1436 if (NULL == siocb->scm) { 1437 memset(&scm, 0, sizeof(scm)); 1438 siocb->scm = &scm; 1439 } 1440 siocb->scm->creds = *NETLINK_CREDS(skb); 1441 if (flags & MSG_TRUNC) 1442 copied = skb->len; 1443 1444 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1445 skb_shinfo(skb)->frag_list = frag; 1446 #endif 1447 1448 skb_free_datagram(sk, skb); 1449 1450 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1451 netlink_dump(sk); 1452 1453 scm_recv(sock, msg, siocb->scm, flags); 1454 out: 1455 netlink_rcv_wake(sk); 1456 return err ? : copied; 1457 } 1458 1459 static void netlink_data_ready(struct sock *sk, int len) 1460 { 1461 BUG(); 1462 } 1463 1464 /* 1465 * We export these functions to other modules. They provide a 1466 * complete set of kernel non-blocking support for message 1467 * queueing. 1468 */ 1469 1470 struct sock * 1471 netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1472 void (*input)(struct sk_buff *skb), 1473 struct mutex *cb_mutex, struct module *module) 1474 { 1475 struct socket *sock; 1476 struct sock *sk; 1477 struct netlink_sock *nlk; 1478 unsigned long *listeners = NULL; 1479 1480 BUG_ON(!nl_table); 1481 1482 if (unit < 0 || unit >= MAX_LINKS) 1483 return NULL; 1484 1485 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1486 return NULL; 1487 1488 /* 1489 * We have to just have a reference on the net from sk, but don't 1490 * get_net it. Besides, we cannot get and then put the net here. 1491 * So we create one inside init_net and the move it to net. 1492 */ 1493 1494 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) 1495 goto out_sock_release_nosk; 1496 1497 sk = sock->sk; 1498 sk_change_net(sk, net); 1499 1500 if (groups < 32) 1501 groups = 32; 1502 1503 listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), 1504 GFP_KERNEL); 1505 if (!listeners) 1506 goto out_sock_release; 1507 1508 sk->sk_data_ready = netlink_data_ready; 1509 if (input) 1510 nlk_sk(sk)->netlink_rcv = input; 1511 1512 if (netlink_insert(sk, net, 0)) 1513 goto out_sock_release; 1514 1515 nlk = nlk_sk(sk); 1516 nlk->flags |= NETLINK_KERNEL_SOCKET; 1517 1518 netlink_table_grab(); 1519 if (!nl_table[unit].registered) { 1520 nl_table[unit].groups = groups; 1521 nl_table[unit].listeners = listeners; 1522 nl_table[unit].cb_mutex = cb_mutex; 1523 nl_table[unit].module = module; 1524 nl_table[unit].registered = 1; 1525 } else { 1526 kfree(listeners); 1527 nl_table[unit].registered++; 1528 } 1529 netlink_table_ungrab(); 1530 return sk; 1531 1532 out_sock_release: 1533 kfree(listeners); 1534 netlink_kernel_release(sk); 1535 return NULL; 1536 1537 out_sock_release_nosk: 1538 sock_release(sock); 1539 return NULL; 1540 } 1541 EXPORT_SYMBOL(netlink_kernel_create); 1542 1543 1544 void 1545 netlink_kernel_release(struct sock *sk) 1546 { 1547 sk_release_kernel(sk); 1548 } 1549 EXPORT_SYMBOL(netlink_kernel_release); 1550 1551 1552 static void netlink_free_old_listeners(struct rcu_head *rcu_head) 1553 { 1554 struct listeners_rcu_head *lrh; 1555 1556 lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head); 1557 kfree(lrh->ptr); 1558 } 1559 1560 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1561 { 1562 unsigned long *listeners, *old = NULL; 1563 struct listeners_rcu_head *old_rcu_head; 1564 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1565 1566 if (groups < 32) 1567 groups = 32; 1568 1569 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1570 listeners = kzalloc(NLGRPSZ(groups) + 1571 sizeof(struct listeners_rcu_head), 1572 GFP_ATOMIC); 1573 if (!listeners) 1574 return -ENOMEM; 1575 old = tbl->listeners; 1576 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1577 rcu_assign_pointer(tbl->listeners, listeners); 1578 /* 1579 * Free the old memory after an RCU grace period so we 1580 * don't leak it. We use call_rcu() here in order to be 1581 * able to call this function from atomic contexts. The 1582 * allocation of this memory will have reserved enough 1583 * space for struct listeners_rcu_head at the end. 1584 */ 1585 old_rcu_head = (void *)(tbl->listeners + 1586 NLGRPLONGS(tbl->groups)); 1587 old_rcu_head->ptr = old; 1588 call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners); 1589 } 1590 tbl->groups = groups; 1591 1592 return 0; 1593 } 1594 1595 /** 1596 * netlink_change_ngroups - change number of multicast groups 1597 * 1598 * This changes the number of multicast groups that are available 1599 * on a certain netlink family. Note that it is not possible to 1600 * change the number of groups to below 32. Also note that it does 1601 * not implicitly call netlink_clear_multicast_users() when the 1602 * number of groups is reduced. 1603 * 1604 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1605 * @groups: The new number of groups. 1606 */ 1607 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1608 { 1609 int err; 1610 1611 netlink_table_grab(); 1612 err = __netlink_change_ngroups(sk, groups); 1613 netlink_table_ungrab(); 1614 1615 return err; 1616 } 1617 1618 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1619 { 1620 struct sock *sk; 1621 struct hlist_node *node; 1622 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 1623 1624 sk_for_each_bound(sk, node, &tbl->mc_list) 1625 netlink_update_socket_mc(nlk_sk(sk), group, 0); 1626 } 1627 1628 /** 1629 * netlink_clear_multicast_users - kick off multicast listeners 1630 * 1631 * This function removes all listeners from the given group. 1632 * @ksk: The kernel netlink socket, as returned by 1633 * netlink_kernel_create(). 1634 * @group: The multicast group to clear. 1635 */ 1636 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1637 { 1638 netlink_table_grab(); 1639 __netlink_clear_multicast_users(ksk, group); 1640 netlink_table_ungrab(); 1641 } 1642 1643 void netlink_set_nonroot(int protocol, unsigned int flags) 1644 { 1645 if ((unsigned int)protocol < MAX_LINKS) 1646 nl_table[protocol].nl_nonroot = flags; 1647 } 1648 EXPORT_SYMBOL(netlink_set_nonroot); 1649 1650 static void netlink_destroy_callback(struct netlink_callback *cb) 1651 { 1652 kfree_skb(cb->skb); 1653 kfree(cb); 1654 } 1655 1656 /* 1657 * It looks a bit ugly. 1658 * It would be better to create kernel thread. 1659 */ 1660 1661 static int netlink_dump(struct sock *sk) 1662 { 1663 struct netlink_sock *nlk = nlk_sk(sk); 1664 struct netlink_callback *cb; 1665 struct sk_buff *skb; 1666 struct nlmsghdr *nlh; 1667 int len, err = -ENOBUFS; 1668 1669 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); 1670 if (!skb) 1671 goto errout; 1672 1673 mutex_lock(nlk->cb_mutex); 1674 1675 cb = nlk->cb; 1676 if (cb == NULL) { 1677 err = -EINVAL; 1678 goto errout_skb; 1679 } 1680 1681 len = cb->dump(skb, cb); 1682 1683 if (len > 0) { 1684 mutex_unlock(nlk->cb_mutex); 1685 1686 if (sk_filter(sk, skb)) 1687 kfree_skb(skb); 1688 else { 1689 skb_queue_tail(&sk->sk_receive_queue, skb); 1690 sk->sk_data_ready(sk, skb->len); 1691 } 1692 return 0; 1693 } 1694 1695 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1696 if (!nlh) 1697 goto errout_skb; 1698 1699 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 1700 1701 if (sk_filter(sk, skb)) 1702 kfree_skb(skb); 1703 else { 1704 skb_queue_tail(&sk->sk_receive_queue, skb); 1705 sk->sk_data_ready(sk, skb->len); 1706 } 1707 1708 if (cb->done) 1709 cb->done(cb); 1710 nlk->cb = NULL; 1711 mutex_unlock(nlk->cb_mutex); 1712 1713 netlink_destroy_callback(cb); 1714 return 0; 1715 1716 errout_skb: 1717 mutex_unlock(nlk->cb_mutex); 1718 kfree_skb(skb); 1719 errout: 1720 return err; 1721 } 1722 1723 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1724 const struct nlmsghdr *nlh, 1725 int (*dump)(struct sk_buff *skb, 1726 struct netlink_callback *), 1727 int (*done)(struct netlink_callback *)) 1728 { 1729 struct netlink_callback *cb; 1730 struct sock *sk; 1731 struct netlink_sock *nlk; 1732 1733 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1734 if (cb == NULL) 1735 return -ENOBUFS; 1736 1737 cb->dump = dump; 1738 cb->done = done; 1739 cb->nlh = nlh; 1740 atomic_inc(&skb->users); 1741 cb->skb = skb; 1742 1743 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); 1744 if (sk == NULL) { 1745 netlink_destroy_callback(cb); 1746 return -ECONNREFUSED; 1747 } 1748 nlk = nlk_sk(sk); 1749 /* A dump is in progress... */ 1750 mutex_lock(nlk->cb_mutex); 1751 if (nlk->cb) { 1752 mutex_unlock(nlk->cb_mutex); 1753 netlink_destroy_callback(cb); 1754 sock_put(sk); 1755 return -EBUSY; 1756 } 1757 nlk->cb = cb; 1758 mutex_unlock(nlk->cb_mutex); 1759 1760 netlink_dump(sk); 1761 sock_put(sk); 1762 1763 /* We successfully started a dump, by returning -EINTR we 1764 * signal not to send ACK even if it was requested. 1765 */ 1766 return -EINTR; 1767 } 1768 EXPORT_SYMBOL(netlink_dump_start); 1769 1770 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1771 { 1772 struct sk_buff *skb; 1773 struct nlmsghdr *rep; 1774 struct nlmsgerr *errmsg; 1775 size_t payload = sizeof(*errmsg); 1776 1777 /* error messages get the original request appened */ 1778 if (err) 1779 payload += nlmsg_len(nlh); 1780 1781 skb = nlmsg_new(payload, GFP_KERNEL); 1782 if (!skb) { 1783 struct sock *sk; 1784 1785 sk = netlink_lookup(sock_net(in_skb->sk), 1786 in_skb->sk->sk_protocol, 1787 NETLINK_CB(in_skb).pid); 1788 if (sk) { 1789 sk->sk_err = ENOBUFS; 1790 sk->sk_error_report(sk); 1791 sock_put(sk); 1792 } 1793 return; 1794 } 1795 1796 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1797 NLMSG_ERROR, payload, 0); 1798 errmsg = nlmsg_data(rep); 1799 errmsg->error = err; 1800 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1801 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1802 } 1803 EXPORT_SYMBOL(netlink_ack); 1804 1805 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1806 struct nlmsghdr *)) 1807 { 1808 struct nlmsghdr *nlh; 1809 int err; 1810 1811 while (skb->len >= nlmsg_total_size(0)) { 1812 int msglen; 1813 1814 nlh = nlmsg_hdr(skb); 1815 err = 0; 1816 1817 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1818 return 0; 1819 1820 /* Only requests are handled by the kernel */ 1821 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 1822 goto ack; 1823 1824 /* Skip control messages */ 1825 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 1826 goto ack; 1827 1828 err = cb(skb, nlh); 1829 if (err == -EINTR) 1830 goto skip; 1831 1832 ack: 1833 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1834 netlink_ack(skb, nlh, err); 1835 1836 skip: 1837 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1838 if (msglen > skb->len) 1839 msglen = skb->len; 1840 skb_pull(skb, msglen); 1841 } 1842 1843 return 0; 1844 } 1845 EXPORT_SYMBOL(netlink_rcv_skb); 1846 1847 /** 1848 * nlmsg_notify - send a notification netlink message 1849 * @sk: netlink socket to use 1850 * @skb: notification message 1851 * @pid: destination netlink pid for reports or 0 1852 * @group: destination multicast group or 0 1853 * @report: 1 to report back, 0 to disable 1854 * @flags: allocation flags 1855 */ 1856 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, 1857 unsigned int group, int report, gfp_t flags) 1858 { 1859 int err = 0; 1860 1861 if (group) { 1862 int exclude_pid = 0; 1863 1864 if (report) { 1865 atomic_inc(&skb->users); 1866 exclude_pid = pid; 1867 } 1868 1869 /* errors reported via destination sk->sk_err, but propagate 1870 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 1871 err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1872 } 1873 1874 if (report) { 1875 int err2; 1876 1877 err2 = nlmsg_unicast(sk, skb, pid); 1878 if (!err || err == -ESRCH) 1879 err = err2; 1880 } 1881 1882 return err; 1883 } 1884 EXPORT_SYMBOL(nlmsg_notify); 1885 1886 #ifdef CONFIG_PROC_FS 1887 struct nl_seq_iter { 1888 struct seq_net_private p; 1889 int link; 1890 int hash_idx; 1891 }; 1892 1893 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 1894 { 1895 struct nl_seq_iter *iter = seq->private; 1896 int i, j; 1897 struct sock *s; 1898 struct hlist_node *node; 1899 loff_t off = 0; 1900 1901 for (i = 0; i < MAX_LINKS; i++) { 1902 struct nl_pid_hash *hash = &nl_table[i].hash; 1903 1904 for (j = 0; j <= hash->mask; j++) { 1905 sk_for_each(s, node, &hash->table[j]) { 1906 if (sock_net(s) != seq_file_net(seq)) 1907 continue; 1908 if (off == pos) { 1909 iter->link = i; 1910 iter->hash_idx = j; 1911 return s; 1912 } 1913 ++off; 1914 } 1915 } 1916 } 1917 return NULL; 1918 } 1919 1920 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1921 __acquires(nl_table_lock) 1922 { 1923 read_lock(&nl_table_lock); 1924 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1925 } 1926 1927 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1928 { 1929 struct sock *s; 1930 struct nl_seq_iter *iter; 1931 int i, j; 1932 1933 ++*pos; 1934 1935 if (v == SEQ_START_TOKEN) 1936 return netlink_seq_socket_idx(seq, 0); 1937 1938 iter = seq->private; 1939 s = v; 1940 do { 1941 s = sk_next(s); 1942 } while (s && sock_net(s) != seq_file_net(seq)); 1943 if (s) 1944 return s; 1945 1946 i = iter->link; 1947 j = iter->hash_idx + 1; 1948 1949 do { 1950 struct nl_pid_hash *hash = &nl_table[i].hash; 1951 1952 for (; j <= hash->mask; j++) { 1953 s = sk_head(&hash->table[j]); 1954 while (s && sock_net(s) != seq_file_net(seq)) 1955 s = sk_next(s); 1956 if (s) { 1957 iter->link = i; 1958 iter->hash_idx = j; 1959 return s; 1960 } 1961 } 1962 1963 j = 0; 1964 } while (++i < MAX_LINKS); 1965 1966 return NULL; 1967 } 1968 1969 static void netlink_seq_stop(struct seq_file *seq, void *v) 1970 __releases(nl_table_lock) 1971 { 1972 read_unlock(&nl_table_lock); 1973 } 1974 1975 1976 static int netlink_seq_show(struct seq_file *seq, void *v) 1977 { 1978 if (v == SEQ_START_TOKEN) 1979 seq_puts(seq, 1980 "sk Eth Pid Groups " 1981 "Rmem Wmem Dump Locks Drops\n"); 1982 else { 1983 struct sock *s = v; 1984 struct netlink_sock *nlk = nlk_sk(s); 1985 1986 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n", 1987 s, 1988 s->sk_protocol, 1989 nlk->pid, 1990 nlk->groups ? (u32)nlk->groups[0] : 0, 1991 sk_rmem_alloc_get(s), 1992 sk_wmem_alloc_get(s), 1993 nlk->cb, 1994 atomic_read(&s->sk_refcnt), 1995 atomic_read(&s->sk_drops) 1996 ); 1997 1998 } 1999 return 0; 2000 } 2001 2002 static const struct seq_operations netlink_seq_ops = { 2003 .start = netlink_seq_start, 2004 .next = netlink_seq_next, 2005 .stop = netlink_seq_stop, 2006 .show = netlink_seq_show, 2007 }; 2008 2009 2010 static int netlink_seq_open(struct inode *inode, struct file *file) 2011 { 2012 return seq_open_net(inode, file, &netlink_seq_ops, 2013 sizeof(struct nl_seq_iter)); 2014 } 2015 2016 static const struct file_operations netlink_seq_fops = { 2017 .owner = THIS_MODULE, 2018 .open = netlink_seq_open, 2019 .read = seq_read, 2020 .llseek = seq_lseek, 2021 .release = seq_release_net, 2022 }; 2023 2024 #endif 2025 2026 int netlink_register_notifier(struct notifier_block *nb) 2027 { 2028 return atomic_notifier_chain_register(&netlink_chain, nb); 2029 } 2030 EXPORT_SYMBOL(netlink_register_notifier); 2031 2032 int netlink_unregister_notifier(struct notifier_block *nb) 2033 { 2034 return atomic_notifier_chain_unregister(&netlink_chain, nb); 2035 } 2036 EXPORT_SYMBOL(netlink_unregister_notifier); 2037 2038 static const struct proto_ops netlink_ops = { 2039 .family = PF_NETLINK, 2040 .owner = THIS_MODULE, 2041 .release = netlink_release, 2042 .bind = netlink_bind, 2043 .connect = netlink_connect, 2044 .socketpair = sock_no_socketpair, 2045 .accept = sock_no_accept, 2046 .getname = netlink_getname, 2047 .poll = datagram_poll, 2048 .ioctl = sock_no_ioctl, 2049 .listen = sock_no_listen, 2050 .shutdown = sock_no_shutdown, 2051 .setsockopt = netlink_setsockopt, 2052 .getsockopt = netlink_getsockopt, 2053 .sendmsg = netlink_sendmsg, 2054 .recvmsg = netlink_recvmsg, 2055 .mmap = sock_no_mmap, 2056 .sendpage = sock_no_sendpage, 2057 }; 2058 2059 static const struct net_proto_family netlink_family_ops = { 2060 .family = PF_NETLINK, 2061 .create = netlink_create, 2062 .owner = THIS_MODULE, /* for consistency 8) */ 2063 }; 2064 2065 static int __net_init netlink_net_init(struct net *net) 2066 { 2067 #ifdef CONFIG_PROC_FS 2068 if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) 2069 return -ENOMEM; 2070 #endif 2071 return 0; 2072 } 2073 2074 static void __net_exit netlink_net_exit(struct net *net) 2075 { 2076 #ifdef CONFIG_PROC_FS 2077 proc_net_remove(net, "netlink"); 2078 #endif 2079 } 2080 2081 static struct pernet_operations __net_initdata netlink_net_ops = { 2082 .init = netlink_net_init, 2083 .exit = netlink_net_exit, 2084 }; 2085 2086 static int __init netlink_proto_init(void) 2087 { 2088 struct sk_buff *dummy_skb; 2089 int i; 2090 unsigned long limit; 2091 unsigned int order; 2092 int err = proto_register(&netlink_proto, 0); 2093 2094 if (err != 0) 2095 goto out; 2096 2097 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); 2098 2099 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2100 if (!nl_table) 2101 goto panic; 2102 2103 if (totalram_pages >= (128 * 1024)) 2104 limit = totalram_pages >> (21 - PAGE_SHIFT); 2105 else 2106 limit = totalram_pages >> (23 - PAGE_SHIFT); 2107 2108 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 2109 limit = (1UL << order) / sizeof(struct hlist_head); 2110 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 2111 2112 for (i = 0; i < MAX_LINKS; i++) { 2113 struct nl_pid_hash *hash = &nl_table[i].hash; 2114 2115 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); 2116 if (!hash->table) { 2117 while (i-- > 0) 2118 nl_pid_hash_free(nl_table[i].hash.table, 2119 1 * sizeof(*hash->table)); 2120 kfree(nl_table); 2121 goto panic; 2122 } 2123 hash->max_shift = order; 2124 hash->shift = 0; 2125 hash->mask = 0; 2126 hash->rehash_time = jiffies; 2127 } 2128 2129 sock_register(&netlink_family_ops); 2130 register_pernet_subsys(&netlink_net_ops); 2131 /* The netlink device handler may be needed early. */ 2132 rtnetlink_init(); 2133 out: 2134 return err; 2135 panic: 2136 panic("netlink_init: Cannot allocate nl_table\n"); 2137 } 2138 2139 core_initcall(netlink_proto_init); 2140