1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@redhat.com> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 13 * added netlink_proto_exit 14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 15 * use nlk_sk, as sk->protinfo is on a diet 8) 16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 17 * - inc module use count of module that owns 18 * the kernel socket in case userspace opens 19 * socket of same protocol 20 * - remove all module support, since netlink is 21 * mandatory if CONFIG_NET=y these days 22 */ 23 24 #include <linux/module.h> 25 26 #include <linux/capability.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/signal.h> 30 #include <linux/sched.h> 31 #include <linux/errno.h> 32 #include <linux/string.h> 33 #include <linux/stat.h> 34 #include <linux/socket.h> 35 #include <linux/un.h> 36 #include <linux/fcntl.h> 37 #include <linux/termios.h> 38 #include <linux/sockios.h> 39 #include <linux/net.h> 40 #include <linux/fs.h> 41 #include <linux/slab.h> 42 #include <asm/uaccess.h> 43 #include <linux/skbuff.h> 44 #include <linux/netdevice.h> 45 #include <linux/rtnetlink.h> 46 #include <linux/proc_fs.h> 47 #include <linux/seq_file.h> 48 #include <linux/notifier.h> 49 #include <linux/security.h> 50 #include <linux/jhash.h> 51 #include <linux/jiffies.h> 52 #include <linux/random.h> 53 #include <linux/bitops.h> 54 #include <linux/mm.h> 55 #include <linux/types.h> 56 #include <linux/audit.h> 57 #include <linux/selinux.h> 58 #include <linux/mutex.h> 59 60 #include <net/net_namespace.h> 61 #include <net/sock.h> 62 #include <net/scm.h> 63 #include <net/netlink.h> 64 65 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 66 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) 67 68 struct netlink_sock { 69 /* struct sock has to be the first member of netlink_sock */ 70 struct sock sk; 71 u32 pid; 72 u32 dst_pid; 73 u32 dst_group; 74 u32 flags; 75 u32 subscriptions; 76 u32 ngroups; 77 unsigned long *groups; 78 unsigned long state; 79 wait_queue_head_t wait; 80 struct netlink_callback *cb; 81 struct mutex *cb_mutex; 82 struct mutex cb_def_mutex; 83 void (*netlink_rcv)(struct sk_buff *skb); 84 struct module *module; 85 }; 86 87 #define NETLINK_KERNEL_SOCKET 0x1 88 #define NETLINK_RECV_PKTINFO 0x2 89 90 static inline struct netlink_sock *nlk_sk(struct sock *sk) 91 { 92 return container_of(sk, struct netlink_sock, sk); 93 } 94 95 static inline int netlink_is_kernel(struct sock *sk) 96 { 97 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 98 } 99 100 struct nl_pid_hash { 101 struct hlist_head *table; 102 unsigned long rehash_time; 103 104 unsigned int mask; 105 unsigned int shift; 106 107 unsigned int entries; 108 unsigned int max_shift; 109 110 u32 rnd; 111 }; 112 113 struct netlink_table { 114 struct nl_pid_hash hash; 115 struct hlist_head mc_list; 116 unsigned long *listeners; 117 unsigned int nl_nonroot; 118 unsigned int groups; 119 struct mutex *cb_mutex; 120 struct module *module; 121 int registered; 122 }; 123 124 static struct netlink_table *nl_table; 125 126 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 127 128 static int netlink_dump(struct sock *sk); 129 static void netlink_destroy_callback(struct netlink_callback *cb); 130 131 static DEFINE_RWLOCK(nl_table_lock); 132 static atomic_t nl_table_users = ATOMIC_INIT(0); 133 134 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 135 136 static u32 netlink_group_mask(u32 group) 137 { 138 return group ? 1 << (group - 1) : 0; 139 } 140 141 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) 142 { 143 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; 144 } 145 146 static void netlink_sock_destruct(struct sock *sk) 147 { 148 struct netlink_sock *nlk = nlk_sk(sk); 149 150 if (nlk->cb) { 151 if (nlk->cb->done) 152 nlk->cb->done(nlk->cb); 153 netlink_destroy_callback(nlk->cb); 154 } 155 156 skb_queue_purge(&sk->sk_receive_queue); 157 158 if (!sock_flag(sk, SOCK_DEAD)) { 159 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 160 return; 161 } 162 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); 163 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); 164 BUG_TRAP(!nlk_sk(sk)->groups); 165 } 166 167 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 168 * SMP. Look, when several writers sleep and reader wakes them up, all but one 169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 170 * this, _but_ remember, it adds useless work on UP machines. 171 */ 172 173 static void netlink_table_grab(void) 174 __acquires(nl_table_lock) 175 { 176 write_lock_irq(&nl_table_lock); 177 178 if (atomic_read(&nl_table_users)) { 179 DECLARE_WAITQUEUE(wait, current); 180 181 add_wait_queue_exclusive(&nl_table_wait, &wait); 182 for (;;) { 183 set_current_state(TASK_UNINTERRUPTIBLE); 184 if (atomic_read(&nl_table_users) == 0) 185 break; 186 write_unlock_irq(&nl_table_lock); 187 schedule(); 188 write_lock_irq(&nl_table_lock); 189 } 190 191 __set_current_state(TASK_RUNNING); 192 remove_wait_queue(&nl_table_wait, &wait); 193 } 194 } 195 196 static void netlink_table_ungrab(void) 197 __releases(nl_table_lock) 198 { 199 write_unlock_irq(&nl_table_lock); 200 wake_up(&nl_table_wait); 201 } 202 203 static inline void 204 netlink_lock_table(void) 205 { 206 /* read_lock() synchronizes us to netlink_table_grab */ 207 208 read_lock(&nl_table_lock); 209 atomic_inc(&nl_table_users); 210 read_unlock(&nl_table_lock); 211 } 212 213 static inline void 214 netlink_unlock_table(void) 215 { 216 if (atomic_dec_and_test(&nl_table_users)) 217 wake_up(&nl_table_wait); 218 } 219 220 static inline struct sock *netlink_lookup(struct net *net, int protocol, 221 u32 pid) 222 { 223 struct nl_pid_hash *hash = &nl_table[protocol].hash; 224 struct hlist_head *head; 225 struct sock *sk; 226 struct hlist_node *node; 227 228 read_lock(&nl_table_lock); 229 head = nl_pid_hashfn(hash, pid); 230 sk_for_each(sk, node, head) { 231 if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { 232 sock_hold(sk); 233 goto found; 234 } 235 } 236 sk = NULL; 237 found: 238 read_unlock(&nl_table_lock); 239 return sk; 240 } 241 242 static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) 243 { 244 if (size <= PAGE_SIZE) 245 return kzalloc(size, GFP_ATOMIC); 246 else 247 return (struct hlist_head *) 248 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 249 get_order(size)); 250 } 251 252 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 253 { 254 if (size <= PAGE_SIZE) 255 kfree(table); 256 else 257 free_pages((unsigned long)table, get_order(size)); 258 } 259 260 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) 261 { 262 unsigned int omask, mask, shift; 263 size_t osize, size; 264 struct hlist_head *otable, *table; 265 int i; 266 267 omask = mask = hash->mask; 268 osize = size = (mask + 1) * sizeof(*table); 269 shift = hash->shift; 270 271 if (grow) { 272 if (++shift > hash->max_shift) 273 return 0; 274 mask = mask * 2 + 1; 275 size *= 2; 276 } 277 278 table = nl_pid_hash_zalloc(size); 279 if (!table) 280 return 0; 281 282 otable = hash->table; 283 hash->table = table; 284 hash->mask = mask; 285 hash->shift = shift; 286 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 287 288 for (i = 0; i <= omask; i++) { 289 struct sock *sk; 290 struct hlist_node *node, *tmp; 291 292 sk_for_each_safe(sk, node, tmp, &otable[i]) 293 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); 294 } 295 296 nl_pid_hash_free(otable, osize); 297 hash->rehash_time = jiffies + 10 * 60 * HZ; 298 return 1; 299 } 300 301 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) 302 { 303 int avg = hash->entries >> hash->shift; 304 305 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) 306 return 1; 307 308 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 309 nl_pid_hash_rehash(hash, 0); 310 return 1; 311 } 312 313 return 0; 314 } 315 316 static const struct proto_ops netlink_ops; 317 318 static void 319 netlink_update_listeners(struct sock *sk) 320 { 321 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 322 struct hlist_node *node; 323 unsigned long mask; 324 unsigned int i; 325 326 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 327 mask = 0; 328 sk_for_each_bound(sk, node, &tbl->mc_list) { 329 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 330 mask |= nlk_sk(sk)->groups[i]; 331 } 332 tbl->listeners[i] = mask; 333 } 334 /* this function is only called with the netlink table "grabbed", which 335 * makes sure updates are visible before bind or setsockopt return. */ 336 } 337 338 static int netlink_insert(struct sock *sk, struct net *net, u32 pid) 339 { 340 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 341 struct hlist_head *head; 342 int err = -EADDRINUSE; 343 struct sock *osk; 344 struct hlist_node *node; 345 int len; 346 347 netlink_table_grab(); 348 head = nl_pid_hashfn(hash, pid); 349 len = 0; 350 sk_for_each(osk, node, head) { 351 if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) 352 break; 353 len++; 354 } 355 if (node) 356 goto err; 357 358 err = -EBUSY; 359 if (nlk_sk(sk)->pid) 360 goto err; 361 362 err = -ENOMEM; 363 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 364 goto err; 365 366 if (len && nl_pid_hash_dilute(hash, len)) 367 head = nl_pid_hashfn(hash, pid); 368 hash->entries++; 369 nlk_sk(sk)->pid = pid; 370 sk_add_node(sk, head); 371 err = 0; 372 373 err: 374 netlink_table_ungrab(); 375 return err; 376 } 377 378 static void netlink_remove(struct sock *sk) 379 { 380 netlink_table_grab(); 381 if (sk_del_node_init(sk)) 382 nl_table[sk->sk_protocol].hash.entries--; 383 if (nlk_sk(sk)->subscriptions) 384 __sk_del_bind_node(sk); 385 netlink_table_ungrab(); 386 } 387 388 static struct proto netlink_proto = { 389 .name = "NETLINK", 390 .owner = THIS_MODULE, 391 .obj_size = sizeof(struct netlink_sock), 392 }; 393 394 static int __netlink_create(struct net *net, struct socket *sock, 395 struct mutex *cb_mutex, int protocol) 396 { 397 struct sock *sk; 398 struct netlink_sock *nlk; 399 400 sock->ops = &netlink_ops; 401 402 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 403 if (!sk) 404 return -ENOMEM; 405 406 sock_init_data(sock, sk); 407 408 nlk = nlk_sk(sk); 409 if (cb_mutex) 410 nlk->cb_mutex = cb_mutex; 411 else { 412 nlk->cb_mutex = &nlk->cb_def_mutex; 413 mutex_init(nlk->cb_mutex); 414 } 415 init_waitqueue_head(&nlk->wait); 416 417 sk->sk_destruct = netlink_sock_destruct; 418 sk->sk_protocol = protocol; 419 return 0; 420 } 421 422 static int netlink_create(struct net *net, struct socket *sock, int protocol) 423 { 424 struct module *module = NULL; 425 struct mutex *cb_mutex; 426 struct netlink_sock *nlk; 427 int err = 0; 428 429 sock->state = SS_UNCONNECTED; 430 431 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 432 return -ESOCKTNOSUPPORT; 433 434 if (protocol < 0 || protocol >= MAX_LINKS) 435 return -EPROTONOSUPPORT; 436 437 netlink_lock_table(); 438 #ifdef CONFIG_KMOD 439 if (!nl_table[protocol].registered) { 440 netlink_unlock_table(); 441 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 442 netlink_lock_table(); 443 } 444 #endif 445 if (nl_table[protocol].registered && 446 try_module_get(nl_table[protocol].module)) 447 module = nl_table[protocol].module; 448 cb_mutex = nl_table[protocol].cb_mutex; 449 netlink_unlock_table(); 450 451 err = __netlink_create(net, sock, cb_mutex, protocol); 452 if (err < 0) 453 goto out_module; 454 455 nlk = nlk_sk(sock->sk); 456 nlk->module = module; 457 out: 458 return err; 459 460 out_module: 461 module_put(module); 462 goto out; 463 } 464 465 static int netlink_release(struct socket *sock) 466 { 467 struct sock *sk = sock->sk; 468 struct netlink_sock *nlk; 469 470 if (!sk) 471 return 0; 472 473 netlink_remove(sk); 474 sock_orphan(sk); 475 nlk = nlk_sk(sk); 476 477 /* 478 * OK. Socket is unlinked, any packets that arrive now 479 * will be purged. 480 */ 481 482 sock->sk = NULL; 483 wake_up_interruptible_all(&nlk->wait); 484 485 skb_queue_purge(&sk->sk_write_queue); 486 487 if (nlk->pid && !nlk->subscriptions) { 488 struct netlink_notify n = { 489 .net = sk->sk_net, 490 .protocol = sk->sk_protocol, 491 .pid = nlk->pid, 492 }; 493 atomic_notifier_call_chain(&netlink_chain, 494 NETLINK_URELEASE, &n); 495 } 496 497 module_put(nlk->module); 498 499 netlink_table_grab(); 500 if (netlink_is_kernel(sk)) { 501 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 502 if (--nl_table[sk->sk_protocol].registered == 0) { 503 kfree(nl_table[sk->sk_protocol].listeners); 504 nl_table[sk->sk_protocol].module = NULL; 505 nl_table[sk->sk_protocol].registered = 0; 506 } 507 } else if (nlk->subscriptions) 508 netlink_update_listeners(sk); 509 netlink_table_ungrab(); 510 511 kfree(nlk->groups); 512 nlk->groups = NULL; 513 514 sock_put(sk); 515 return 0; 516 } 517 518 static int netlink_autobind(struct socket *sock) 519 { 520 struct sock *sk = sock->sk; 521 struct net *net = sk->sk_net; 522 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 523 struct hlist_head *head; 524 struct sock *osk; 525 struct hlist_node *node; 526 s32 pid = current->tgid; 527 int err; 528 static s32 rover = -4097; 529 530 retry: 531 cond_resched(); 532 netlink_table_grab(); 533 head = nl_pid_hashfn(hash, pid); 534 sk_for_each(osk, node, head) { 535 if ((osk->sk_net != net)) 536 continue; 537 if (nlk_sk(osk)->pid == pid) { 538 /* Bind collision, search negative pid values. */ 539 pid = rover--; 540 if (rover > -4097) 541 rover = -4097; 542 netlink_table_ungrab(); 543 goto retry; 544 } 545 } 546 netlink_table_ungrab(); 547 548 err = netlink_insert(sk, net, pid); 549 if (err == -EADDRINUSE) 550 goto retry; 551 552 /* If 2 threads race to autobind, that is fine. */ 553 if (err == -EBUSY) 554 err = 0; 555 556 return err; 557 } 558 559 static inline int netlink_capable(struct socket *sock, unsigned int flag) 560 { 561 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || 562 capable(CAP_NET_ADMIN); 563 } 564 565 static void 566 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 567 { 568 struct netlink_sock *nlk = nlk_sk(sk); 569 570 if (nlk->subscriptions && !subscriptions) 571 __sk_del_bind_node(sk); 572 else if (!nlk->subscriptions && subscriptions) 573 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 574 nlk->subscriptions = subscriptions; 575 } 576 577 static int netlink_realloc_groups(struct sock *sk) 578 { 579 struct netlink_sock *nlk = nlk_sk(sk); 580 unsigned int groups; 581 unsigned long *new_groups; 582 int err = 0; 583 584 netlink_table_grab(); 585 586 groups = nl_table[sk->sk_protocol].groups; 587 if (!nl_table[sk->sk_protocol].registered) { 588 err = -ENOENT; 589 goto out_unlock; 590 } 591 592 if (nlk->ngroups >= groups) 593 goto out_unlock; 594 595 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 596 if (new_groups == NULL) { 597 err = -ENOMEM; 598 goto out_unlock; 599 } 600 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 601 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 602 603 nlk->groups = new_groups; 604 nlk->ngroups = groups; 605 out_unlock: 606 netlink_table_ungrab(); 607 return err; 608 } 609 610 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 611 int addr_len) 612 { 613 struct sock *sk = sock->sk; 614 struct net *net = sk->sk_net; 615 struct netlink_sock *nlk = nlk_sk(sk); 616 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 617 int err; 618 619 if (nladdr->nl_family != AF_NETLINK) 620 return -EINVAL; 621 622 /* Only superuser is allowed to listen multicasts */ 623 if (nladdr->nl_groups) { 624 if (!netlink_capable(sock, NL_NONROOT_RECV)) 625 return -EPERM; 626 err = netlink_realloc_groups(sk); 627 if (err) 628 return err; 629 } 630 631 if (nlk->pid) { 632 if (nladdr->nl_pid != nlk->pid) 633 return -EINVAL; 634 } else { 635 err = nladdr->nl_pid ? 636 netlink_insert(sk, net, nladdr->nl_pid) : 637 netlink_autobind(sock); 638 if (err) 639 return err; 640 } 641 642 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 643 return 0; 644 645 netlink_table_grab(); 646 netlink_update_subscriptions(sk, nlk->subscriptions + 647 hweight32(nladdr->nl_groups) - 648 hweight32(nlk->groups[0])); 649 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 650 netlink_update_listeners(sk); 651 netlink_table_ungrab(); 652 653 return 0; 654 } 655 656 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 657 int alen, int flags) 658 { 659 int err = 0; 660 struct sock *sk = sock->sk; 661 struct netlink_sock *nlk = nlk_sk(sk); 662 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 663 664 if (addr->sa_family == AF_UNSPEC) { 665 sk->sk_state = NETLINK_UNCONNECTED; 666 nlk->dst_pid = 0; 667 nlk->dst_group = 0; 668 return 0; 669 } 670 if (addr->sa_family != AF_NETLINK) 671 return -EINVAL; 672 673 /* Only superuser is allowed to send multicasts */ 674 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) 675 return -EPERM; 676 677 if (!nlk->pid) 678 err = netlink_autobind(sock); 679 680 if (err == 0) { 681 sk->sk_state = NETLINK_CONNECTED; 682 nlk->dst_pid = nladdr->nl_pid; 683 nlk->dst_group = ffs(nladdr->nl_groups); 684 } 685 686 return err; 687 } 688 689 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 690 int *addr_len, int peer) 691 { 692 struct sock *sk = sock->sk; 693 struct netlink_sock *nlk = nlk_sk(sk); 694 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 695 696 nladdr->nl_family = AF_NETLINK; 697 nladdr->nl_pad = 0; 698 *addr_len = sizeof(*nladdr); 699 700 if (peer) { 701 nladdr->nl_pid = nlk->dst_pid; 702 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 703 } else { 704 nladdr->nl_pid = nlk->pid; 705 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 706 } 707 return 0; 708 } 709 710 static void netlink_overrun(struct sock *sk) 711 { 712 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 713 sk->sk_err = ENOBUFS; 714 sk->sk_error_report(sk); 715 } 716 } 717 718 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 719 { 720 struct sock *sock; 721 struct netlink_sock *nlk; 722 723 sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid); 724 if (!sock) 725 return ERR_PTR(-ECONNREFUSED); 726 727 /* Don't bother queuing skb if kernel socket has no input function */ 728 nlk = nlk_sk(sock); 729 if (sock->sk_state == NETLINK_CONNECTED && 730 nlk->dst_pid != nlk_sk(ssk)->pid) { 731 sock_put(sock); 732 return ERR_PTR(-ECONNREFUSED); 733 } 734 return sock; 735 } 736 737 struct sock *netlink_getsockbyfilp(struct file *filp) 738 { 739 struct inode *inode = filp->f_path.dentry->d_inode; 740 struct sock *sock; 741 742 if (!S_ISSOCK(inode->i_mode)) 743 return ERR_PTR(-ENOTSOCK); 744 745 sock = SOCKET_I(inode)->sk; 746 if (sock->sk_family != AF_NETLINK) 747 return ERR_PTR(-EINVAL); 748 749 sock_hold(sock); 750 return sock; 751 } 752 753 /* 754 * Attach a skb to a netlink socket. 755 * The caller must hold a reference to the destination socket. On error, the 756 * reference is dropped. The skb is not send to the destination, just all 757 * all error checks are performed and memory in the queue is reserved. 758 * Return values: 759 * < 0: error. skb freed, reference to sock dropped. 760 * 0: continue 761 * 1: repeat lookup - reference dropped while waiting for socket memory. 762 */ 763 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, 764 long *timeo, struct sock *ssk) 765 { 766 struct netlink_sock *nlk; 767 768 nlk = nlk_sk(sk); 769 770 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 771 test_bit(0, &nlk->state)) { 772 DECLARE_WAITQUEUE(wait, current); 773 if (!*timeo) { 774 if (!ssk || netlink_is_kernel(ssk)) 775 netlink_overrun(sk); 776 sock_put(sk); 777 kfree_skb(skb); 778 return -EAGAIN; 779 } 780 781 __set_current_state(TASK_INTERRUPTIBLE); 782 add_wait_queue(&nlk->wait, &wait); 783 784 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 785 test_bit(0, &nlk->state)) && 786 !sock_flag(sk, SOCK_DEAD)) 787 *timeo = schedule_timeout(*timeo); 788 789 __set_current_state(TASK_RUNNING); 790 remove_wait_queue(&nlk->wait, &wait); 791 sock_put(sk); 792 793 if (signal_pending(current)) { 794 kfree_skb(skb); 795 return sock_intr_errno(*timeo); 796 } 797 return 1; 798 } 799 skb_set_owner_r(skb, sk); 800 return 0; 801 } 802 803 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 804 { 805 int len = skb->len; 806 807 skb_queue_tail(&sk->sk_receive_queue, skb); 808 sk->sk_data_ready(sk, len); 809 sock_put(sk); 810 return len; 811 } 812 813 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 814 { 815 kfree_skb(skb); 816 sock_put(sk); 817 } 818 819 static inline struct sk_buff *netlink_trim(struct sk_buff *skb, 820 gfp_t allocation) 821 { 822 int delta; 823 824 skb_orphan(skb); 825 826 delta = skb->end - skb->tail; 827 if (delta * 2 < skb->truesize) 828 return skb; 829 830 if (skb_shared(skb)) { 831 struct sk_buff *nskb = skb_clone(skb, allocation); 832 if (!nskb) 833 return skb; 834 kfree_skb(skb); 835 skb = nskb; 836 } 837 838 if (!pskb_expand_head(skb, 0, -delta, allocation)) 839 skb->truesize -= delta; 840 841 return skb; 842 } 843 844 static inline void netlink_rcv_wake(struct sock *sk) 845 { 846 struct netlink_sock *nlk = nlk_sk(sk); 847 848 if (skb_queue_empty(&sk->sk_receive_queue)) 849 clear_bit(0, &nlk->state); 850 if (!test_bit(0, &nlk->state)) 851 wake_up_interruptible(&nlk->wait); 852 } 853 854 static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) 855 { 856 int ret; 857 struct netlink_sock *nlk = nlk_sk(sk); 858 859 ret = -ECONNREFUSED; 860 if (nlk->netlink_rcv != NULL) { 861 ret = skb->len; 862 skb_set_owner_r(skb, sk); 863 nlk->netlink_rcv(skb); 864 } 865 kfree_skb(skb); 866 sock_put(sk); 867 return ret; 868 } 869 870 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 871 u32 pid, int nonblock) 872 { 873 struct sock *sk; 874 int err; 875 long timeo; 876 877 skb = netlink_trim(skb, gfp_any()); 878 879 timeo = sock_sndtimeo(ssk, nonblock); 880 retry: 881 sk = netlink_getsockbypid(ssk, pid); 882 if (IS_ERR(sk)) { 883 kfree_skb(skb); 884 return PTR_ERR(sk); 885 } 886 if (netlink_is_kernel(sk)) 887 return netlink_unicast_kernel(sk, skb); 888 889 err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk); 890 if (err == 1) 891 goto retry; 892 if (err) 893 return err; 894 895 return netlink_sendskb(sk, skb); 896 } 897 EXPORT_SYMBOL(netlink_unicast); 898 899 int netlink_has_listeners(struct sock *sk, unsigned int group) 900 { 901 int res = 0; 902 unsigned long *listeners; 903 904 BUG_ON(!netlink_is_kernel(sk)); 905 906 rcu_read_lock(); 907 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 908 909 if (group - 1 < nl_table[sk->sk_protocol].groups) 910 res = test_bit(group - 1, listeners); 911 912 rcu_read_unlock(); 913 914 return res; 915 } 916 EXPORT_SYMBOL_GPL(netlink_has_listeners); 917 918 static inline int netlink_broadcast_deliver(struct sock *sk, 919 struct sk_buff *skb) 920 { 921 struct netlink_sock *nlk = nlk_sk(sk); 922 923 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 924 !test_bit(0, &nlk->state)) { 925 skb_set_owner_r(skb, sk); 926 skb_queue_tail(&sk->sk_receive_queue, skb); 927 sk->sk_data_ready(sk, skb->len); 928 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; 929 } 930 return -1; 931 } 932 933 struct netlink_broadcast_data { 934 struct sock *exclude_sk; 935 struct net *net; 936 u32 pid; 937 u32 group; 938 int failure; 939 int congested; 940 int delivered; 941 gfp_t allocation; 942 struct sk_buff *skb, *skb2; 943 }; 944 945 static inline int do_one_broadcast(struct sock *sk, 946 struct netlink_broadcast_data *p) 947 { 948 struct netlink_sock *nlk = nlk_sk(sk); 949 int val; 950 951 if (p->exclude_sk == sk) 952 goto out; 953 954 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 955 !test_bit(p->group - 1, nlk->groups)) 956 goto out; 957 958 if ((sk->sk_net != p->net)) 959 goto out; 960 961 if (p->failure) { 962 netlink_overrun(sk); 963 goto out; 964 } 965 966 sock_hold(sk); 967 if (p->skb2 == NULL) { 968 if (skb_shared(p->skb)) { 969 p->skb2 = skb_clone(p->skb, p->allocation); 970 } else { 971 p->skb2 = skb_get(p->skb); 972 /* 973 * skb ownership may have been set when 974 * delivered to a previous socket. 975 */ 976 skb_orphan(p->skb2); 977 } 978 } 979 if (p->skb2 == NULL) { 980 netlink_overrun(sk); 981 /* Clone failed. Notify ALL listeners. */ 982 p->failure = 1; 983 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 984 netlink_overrun(sk); 985 } else { 986 p->congested |= val; 987 p->delivered = 1; 988 p->skb2 = NULL; 989 } 990 sock_put(sk); 991 992 out: 993 return 0; 994 } 995 996 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 997 u32 group, gfp_t allocation) 998 { 999 struct net *net = ssk->sk_net; 1000 struct netlink_broadcast_data info; 1001 struct hlist_node *node; 1002 struct sock *sk; 1003 1004 skb = netlink_trim(skb, allocation); 1005 1006 info.exclude_sk = ssk; 1007 info.net = net; 1008 info.pid = pid; 1009 info.group = group; 1010 info.failure = 0; 1011 info.congested = 0; 1012 info.delivered = 0; 1013 info.allocation = allocation; 1014 info.skb = skb; 1015 info.skb2 = NULL; 1016 1017 /* While we sleep in clone, do not allow to change socket list */ 1018 1019 netlink_lock_table(); 1020 1021 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1022 do_one_broadcast(sk, &info); 1023 1024 kfree_skb(skb); 1025 1026 netlink_unlock_table(); 1027 1028 if (info.skb2) 1029 kfree_skb(info.skb2); 1030 1031 if (info.delivered) { 1032 if (info.congested && (allocation & __GFP_WAIT)) 1033 yield(); 1034 return 0; 1035 } 1036 if (info.failure) 1037 return -ENOBUFS; 1038 return -ESRCH; 1039 } 1040 EXPORT_SYMBOL(netlink_broadcast); 1041 1042 struct netlink_set_err_data { 1043 struct sock *exclude_sk; 1044 u32 pid; 1045 u32 group; 1046 int code; 1047 }; 1048 1049 static inline int do_one_set_err(struct sock *sk, 1050 struct netlink_set_err_data *p) 1051 { 1052 struct netlink_sock *nlk = nlk_sk(sk); 1053 1054 if (sk == p->exclude_sk) 1055 goto out; 1056 1057 if (sk->sk_net != p->exclude_sk->sk_net) 1058 goto out; 1059 1060 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 1061 !test_bit(p->group - 1, nlk->groups)) 1062 goto out; 1063 1064 sk->sk_err = p->code; 1065 sk->sk_error_report(sk); 1066 out: 1067 return 0; 1068 } 1069 1070 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) 1071 { 1072 struct netlink_set_err_data info; 1073 struct hlist_node *node; 1074 struct sock *sk; 1075 1076 info.exclude_sk = ssk; 1077 info.pid = pid; 1078 info.group = group; 1079 info.code = code; 1080 1081 read_lock(&nl_table_lock); 1082 1083 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1084 do_one_set_err(sk, &info); 1085 1086 read_unlock(&nl_table_lock); 1087 } 1088 1089 /* must be called with netlink table grabbed */ 1090 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1091 unsigned int group, 1092 int is_new) 1093 { 1094 int old, new = !!is_new, subscriptions; 1095 1096 old = test_bit(group - 1, nlk->groups); 1097 subscriptions = nlk->subscriptions - old + new; 1098 if (new) 1099 __set_bit(group - 1, nlk->groups); 1100 else 1101 __clear_bit(group - 1, nlk->groups); 1102 netlink_update_subscriptions(&nlk->sk, subscriptions); 1103 netlink_update_listeners(&nlk->sk); 1104 } 1105 1106 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1107 char __user *optval, int optlen) 1108 { 1109 struct sock *sk = sock->sk; 1110 struct netlink_sock *nlk = nlk_sk(sk); 1111 unsigned int val = 0; 1112 int err; 1113 1114 if (level != SOL_NETLINK) 1115 return -ENOPROTOOPT; 1116 1117 if (optlen >= sizeof(int) && 1118 get_user(val, (unsigned int __user *)optval)) 1119 return -EFAULT; 1120 1121 switch (optname) { 1122 case NETLINK_PKTINFO: 1123 if (val) 1124 nlk->flags |= NETLINK_RECV_PKTINFO; 1125 else 1126 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1127 err = 0; 1128 break; 1129 case NETLINK_ADD_MEMBERSHIP: 1130 case NETLINK_DROP_MEMBERSHIP: { 1131 if (!netlink_capable(sock, NL_NONROOT_RECV)) 1132 return -EPERM; 1133 err = netlink_realloc_groups(sk); 1134 if (err) 1135 return err; 1136 if (!val || val - 1 >= nlk->ngroups) 1137 return -EINVAL; 1138 netlink_table_grab(); 1139 netlink_update_socket_mc(nlk, val, 1140 optname == NETLINK_ADD_MEMBERSHIP); 1141 netlink_table_ungrab(); 1142 err = 0; 1143 break; 1144 } 1145 default: 1146 err = -ENOPROTOOPT; 1147 } 1148 return err; 1149 } 1150 1151 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1152 char __user *optval, int __user *optlen) 1153 { 1154 struct sock *sk = sock->sk; 1155 struct netlink_sock *nlk = nlk_sk(sk); 1156 int len, val, err; 1157 1158 if (level != SOL_NETLINK) 1159 return -ENOPROTOOPT; 1160 1161 if (get_user(len, optlen)) 1162 return -EFAULT; 1163 if (len < 0) 1164 return -EINVAL; 1165 1166 switch (optname) { 1167 case NETLINK_PKTINFO: 1168 if (len < sizeof(int)) 1169 return -EINVAL; 1170 len = sizeof(int); 1171 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 1172 if (put_user(len, optlen) || 1173 put_user(val, optval)) 1174 return -EFAULT; 1175 err = 0; 1176 break; 1177 default: 1178 err = -ENOPROTOOPT; 1179 } 1180 return err; 1181 } 1182 1183 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1184 { 1185 struct nl_pktinfo info; 1186 1187 info.group = NETLINK_CB(skb).dst_group; 1188 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1189 } 1190 1191 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1192 struct msghdr *msg, size_t len) 1193 { 1194 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1195 struct sock *sk = sock->sk; 1196 struct netlink_sock *nlk = nlk_sk(sk); 1197 struct sockaddr_nl *addr = msg->msg_name; 1198 u32 dst_pid; 1199 u32 dst_group; 1200 struct sk_buff *skb; 1201 int err; 1202 struct scm_cookie scm; 1203 1204 if (msg->msg_flags&MSG_OOB) 1205 return -EOPNOTSUPP; 1206 1207 if (NULL == siocb->scm) 1208 siocb->scm = &scm; 1209 err = scm_send(sock, msg, siocb->scm); 1210 if (err < 0) 1211 return err; 1212 1213 if (msg->msg_namelen) { 1214 if (addr->nl_family != AF_NETLINK) 1215 return -EINVAL; 1216 dst_pid = addr->nl_pid; 1217 dst_group = ffs(addr->nl_groups); 1218 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1219 return -EPERM; 1220 } else { 1221 dst_pid = nlk->dst_pid; 1222 dst_group = nlk->dst_group; 1223 } 1224 1225 if (!nlk->pid) { 1226 err = netlink_autobind(sock); 1227 if (err) 1228 goto out; 1229 } 1230 1231 err = -EMSGSIZE; 1232 if (len > sk->sk_sndbuf - 32) 1233 goto out; 1234 err = -ENOBUFS; 1235 skb = alloc_skb(len, GFP_KERNEL); 1236 if (skb == NULL) 1237 goto out; 1238 1239 NETLINK_CB(skb).pid = nlk->pid; 1240 NETLINK_CB(skb).dst_group = dst_group; 1241 NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); 1242 selinux_get_task_sid(current, &(NETLINK_CB(skb).sid)); 1243 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1244 1245 /* What can I do? Netlink is asynchronous, so that 1246 we will have to save current capabilities to 1247 check them, when this message will be delivered 1248 to corresponding kernel module. --ANK (980802) 1249 */ 1250 1251 err = -EFAULT; 1252 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1253 kfree_skb(skb); 1254 goto out; 1255 } 1256 1257 err = security_netlink_send(sk, skb); 1258 if (err) { 1259 kfree_skb(skb); 1260 goto out; 1261 } 1262 1263 if (dst_group) { 1264 atomic_inc(&skb->users); 1265 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); 1266 } 1267 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1268 1269 out: 1270 return err; 1271 } 1272 1273 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 1274 struct msghdr *msg, size_t len, 1275 int flags) 1276 { 1277 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1278 struct scm_cookie scm; 1279 struct sock *sk = sock->sk; 1280 struct netlink_sock *nlk = nlk_sk(sk); 1281 int noblock = flags&MSG_DONTWAIT; 1282 size_t copied; 1283 struct sk_buff *skb; 1284 int err; 1285 1286 if (flags&MSG_OOB) 1287 return -EOPNOTSUPP; 1288 1289 copied = 0; 1290 1291 skb = skb_recv_datagram(sk, flags, noblock, &err); 1292 if (skb == NULL) 1293 goto out; 1294 1295 msg->msg_namelen = 0; 1296 1297 copied = skb->len; 1298 if (len < copied) { 1299 msg->msg_flags |= MSG_TRUNC; 1300 copied = len; 1301 } 1302 1303 skb_reset_transport_header(skb); 1304 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1305 1306 if (msg->msg_name) { 1307 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; 1308 addr->nl_family = AF_NETLINK; 1309 addr->nl_pad = 0; 1310 addr->nl_pid = NETLINK_CB(skb).pid; 1311 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1312 msg->msg_namelen = sizeof(*addr); 1313 } 1314 1315 if (nlk->flags & NETLINK_RECV_PKTINFO) 1316 netlink_cmsg_recv_pktinfo(msg, skb); 1317 1318 if (NULL == siocb->scm) { 1319 memset(&scm, 0, sizeof(scm)); 1320 siocb->scm = &scm; 1321 } 1322 siocb->scm->creds = *NETLINK_CREDS(skb); 1323 if (flags & MSG_TRUNC) 1324 copied = skb->len; 1325 skb_free_datagram(sk, skb); 1326 1327 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1328 netlink_dump(sk); 1329 1330 scm_recv(sock, msg, siocb->scm, flags); 1331 out: 1332 netlink_rcv_wake(sk); 1333 return err ? : copied; 1334 } 1335 1336 static void netlink_data_ready(struct sock *sk, int len) 1337 { 1338 BUG(); 1339 } 1340 1341 /* 1342 * We export these functions to other modules. They provide a 1343 * complete set of kernel non-blocking support for message 1344 * queueing. 1345 */ 1346 1347 struct sock * 1348 netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1349 void (*input)(struct sk_buff *skb), 1350 struct mutex *cb_mutex, struct module *module) 1351 { 1352 struct socket *sock; 1353 struct sock *sk; 1354 struct netlink_sock *nlk; 1355 unsigned long *listeners = NULL; 1356 1357 BUG_ON(!nl_table); 1358 1359 if (unit < 0 || unit >= MAX_LINKS) 1360 return NULL; 1361 1362 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1363 return NULL; 1364 1365 if (__netlink_create(net, sock, cb_mutex, unit) < 0) 1366 goto out_sock_release; 1367 1368 if (groups < 32) 1369 groups = 32; 1370 1371 listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL); 1372 if (!listeners) 1373 goto out_sock_release; 1374 1375 sk = sock->sk; 1376 sk->sk_data_ready = netlink_data_ready; 1377 if (input) 1378 nlk_sk(sk)->netlink_rcv = input; 1379 1380 if (netlink_insert(sk, net, 0)) 1381 goto out_sock_release; 1382 1383 nlk = nlk_sk(sk); 1384 nlk->flags |= NETLINK_KERNEL_SOCKET; 1385 1386 netlink_table_grab(); 1387 if (!nl_table[unit].registered) { 1388 nl_table[unit].groups = groups; 1389 nl_table[unit].listeners = listeners; 1390 nl_table[unit].cb_mutex = cb_mutex; 1391 nl_table[unit].module = module; 1392 nl_table[unit].registered = 1; 1393 } else { 1394 kfree(listeners); 1395 nl_table[unit].registered++; 1396 } 1397 netlink_table_ungrab(); 1398 1399 return sk; 1400 1401 out_sock_release: 1402 kfree(listeners); 1403 sock_release(sock); 1404 return NULL; 1405 } 1406 EXPORT_SYMBOL(netlink_kernel_create); 1407 1408 1409 void 1410 netlink_kernel_release(struct sock *sk) 1411 { 1412 if (sk == NULL || sk->sk_socket == NULL) 1413 return; 1414 sock_release(sk->sk_socket); 1415 } 1416 EXPORT_SYMBOL(netlink_kernel_release); 1417 1418 1419 /** 1420 * netlink_change_ngroups - change number of multicast groups 1421 * 1422 * This changes the number of multicast groups that are available 1423 * on a certain netlink family. Note that it is not possible to 1424 * change the number of groups to below 32. Also note that it does 1425 * not implicitly call netlink_clear_multicast_users() when the 1426 * number of groups is reduced. 1427 * 1428 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1429 * @groups: The new number of groups. 1430 */ 1431 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1432 { 1433 unsigned long *listeners, *old = NULL; 1434 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1435 int err = 0; 1436 1437 if (groups < 32) 1438 groups = 32; 1439 1440 netlink_table_grab(); 1441 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1442 listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC); 1443 if (!listeners) { 1444 err = -ENOMEM; 1445 goto out_ungrab; 1446 } 1447 old = tbl->listeners; 1448 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1449 rcu_assign_pointer(tbl->listeners, listeners); 1450 } 1451 tbl->groups = groups; 1452 1453 out_ungrab: 1454 netlink_table_ungrab(); 1455 synchronize_rcu(); 1456 kfree(old); 1457 return err; 1458 } 1459 EXPORT_SYMBOL(netlink_change_ngroups); 1460 1461 /** 1462 * netlink_clear_multicast_users - kick off multicast listeners 1463 * 1464 * This function removes all listeners from the given group. 1465 * @ksk: The kernel netlink socket, as returned by 1466 * netlink_kernel_create(). 1467 * @group: The multicast group to clear. 1468 */ 1469 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1470 { 1471 struct sock *sk; 1472 struct hlist_node *node; 1473 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 1474 1475 netlink_table_grab(); 1476 1477 sk_for_each_bound(sk, node, &tbl->mc_list) 1478 netlink_update_socket_mc(nlk_sk(sk), group, 0); 1479 1480 netlink_table_ungrab(); 1481 } 1482 EXPORT_SYMBOL(netlink_clear_multicast_users); 1483 1484 void netlink_set_nonroot(int protocol, unsigned int flags) 1485 { 1486 if ((unsigned int)protocol < MAX_LINKS) 1487 nl_table[protocol].nl_nonroot = flags; 1488 } 1489 EXPORT_SYMBOL(netlink_set_nonroot); 1490 1491 static void netlink_destroy_callback(struct netlink_callback *cb) 1492 { 1493 if (cb->skb) 1494 kfree_skb(cb->skb); 1495 kfree(cb); 1496 } 1497 1498 /* 1499 * It looks a bit ugly. 1500 * It would be better to create kernel thread. 1501 */ 1502 1503 static int netlink_dump(struct sock *sk) 1504 { 1505 struct netlink_sock *nlk = nlk_sk(sk); 1506 struct netlink_callback *cb; 1507 struct sk_buff *skb; 1508 struct nlmsghdr *nlh; 1509 int len, err = -ENOBUFS; 1510 1511 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); 1512 if (!skb) 1513 goto errout; 1514 1515 mutex_lock(nlk->cb_mutex); 1516 1517 cb = nlk->cb; 1518 if (cb == NULL) { 1519 err = -EINVAL; 1520 goto errout_skb; 1521 } 1522 1523 len = cb->dump(skb, cb); 1524 1525 if (len > 0) { 1526 mutex_unlock(nlk->cb_mutex); 1527 skb_queue_tail(&sk->sk_receive_queue, skb); 1528 sk->sk_data_ready(sk, len); 1529 return 0; 1530 } 1531 1532 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1533 if (!nlh) 1534 goto errout_skb; 1535 1536 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 1537 1538 skb_queue_tail(&sk->sk_receive_queue, skb); 1539 sk->sk_data_ready(sk, skb->len); 1540 1541 if (cb->done) 1542 cb->done(cb); 1543 nlk->cb = NULL; 1544 mutex_unlock(nlk->cb_mutex); 1545 1546 netlink_destroy_callback(cb); 1547 return 0; 1548 1549 errout_skb: 1550 mutex_unlock(nlk->cb_mutex); 1551 kfree_skb(skb); 1552 errout: 1553 return err; 1554 } 1555 1556 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1557 struct nlmsghdr *nlh, 1558 int (*dump)(struct sk_buff *skb, 1559 struct netlink_callback *), 1560 int (*done)(struct netlink_callback *)) 1561 { 1562 struct netlink_callback *cb; 1563 struct sock *sk; 1564 struct netlink_sock *nlk; 1565 1566 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1567 if (cb == NULL) 1568 return -ENOBUFS; 1569 1570 cb->dump = dump; 1571 cb->done = done; 1572 cb->nlh = nlh; 1573 atomic_inc(&skb->users); 1574 cb->skb = skb; 1575 1576 sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); 1577 if (sk == NULL) { 1578 netlink_destroy_callback(cb); 1579 return -ECONNREFUSED; 1580 } 1581 nlk = nlk_sk(sk); 1582 /* A dump is in progress... */ 1583 mutex_lock(nlk->cb_mutex); 1584 if (nlk->cb) { 1585 mutex_unlock(nlk->cb_mutex); 1586 netlink_destroy_callback(cb); 1587 sock_put(sk); 1588 return -EBUSY; 1589 } 1590 nlk->cb = cb; 1591 mutex_unlock(nlk->cb_mutex); 1592 1593 netlink_dump(sk); 1594 sock_put(sk); 1595 1596 /* We successfully started a dump, by returning -EINTR we 1597 * signal not to send ACK even if it was requested. 1598 */ 1599 return -EINTR; 1600 } 1601 EXPORT_SYMBOL(netlink_dump_start); 1602 1603 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1604 { 1605 struct sk_buff *skb; 1606 struct nlmsghdr *rep; 1607 struct nlmsgerr *errmsg; 1608 size_t payload = sizeof(*errmsg); 1609 1610 /* error messages get the original request appened */ 1611 if (err) 1612 payload += nlmsg_len(nlh); 1613 1614 skb = nlmsg_new(payload, GFP_KERNEL); 1615 if (!skb) { 1616 struct sock *sk; 1617 1618 sk = netlink_lookup(in_skb->sk->sk_net, 1619 in_skb->sk->sk_protocol, 1620 NETLINK_CB(in_skb).pid); 1621 if (sk) { 1622 sk->sk_err = ENOBUFS; 1623 sk->sk_error_report(sk); 1624 sock_put(sk); 1625 } 1626 return; 1627 } 1628 1629 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1630 NLMSG_ERROR, sizeof(struct nlmsgerr), 0); 1631 errmsg = nlmsg_data(rep); 1632 errmsg->error = err; 1633 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1634 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1635 } 1636 EXPORT_SYMBOL(netlink_ack); 1637 1638 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1639 struct nlmsghdr *)) 1640 { 1641 struct nlmsghdr *nlh; 1642 int err; 1643 1644 while (skb->len >= nlmsg_total_size(0)) { 1645 int msglen; 1646 1647 nlh = nlmsg_hdr(skb); 1648 err = 0; 1649 1650 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1651 return 0; 1652 1653 /* Only requests are handled by the kernel */ 1654 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 1655 goto ack; 1656 1657 /* Skip control messages */ 1658 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 1659 goto ack; 1660 1661 err = cb(skb, nlh); 1662 if (err == -EINTR) 1663 goto skip; 1664 1665 ack: 1666 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1667 netlink_ack(skb, nlh, err); 1668 1669 skip: 1670 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1671 if (msglen > skb->len) 1672 msglen = skb->len; 1673 skb_pull(skb, msglen); 1674 } 1675 1676 return 0; 1677 } 1678 EXPORT_SYMBOL(netlink_rcv_skb); 1679 1680 /** 1681 * nlmsg_notify - send a notification netlink message 1682 * @sk: netlink socket to use 1683 * @skb: notification message 1684 * @pid: destination netlink pid for reports or 0 1685 * @group: destination multicast group or 0 1686 * @report: 1 to report back, 0 to disable 1687 * @flags: allocation flags 1688 */ 1689 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, 1690 unsigned int group, int report, gfp_t flags) 1691 { 1692 int err = 0; 1693 1694 if (group) { 1695 int exclude_pid = 0; 1696 1697 if (report) { 1698 atomic_inc(&skb->users); 1699 exclude_pid = pid; 1700 } 1701 1702 /* errors reported via destination sk->sk_err */ 1703 nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1704 } 1705 1706 if (report) 1707 err = nlmsg_unicast(sk, skb, pid); 1708 1709 return err; 1710 } 1711 EXPORT_SYMBOL(nlmsg_notify); 1712 1713 #ifdef CONFIG_PROC_FS 1714 struct nl_seq_iter { 1715 struct seq_net_private p; 1716 int link; 1717 int hash_idx; 1718 }; 1719 1720 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 1721 { 1722 struct nl_seq_iter *iter = seq->private; 1723 int i, j; 1724 struct sock *s; 1725 struct hlist_node *node; 1726 loff_t off = 0; 1727 1728 for (i = 0; i < MAX_LINKS; i++) { 1729 struct nl_pid_hash *hash = &nl_table[i].hash; 1730 1731 for (j = 0; j <= hash->mask; j++) { 1732 sk_for_each(s, node, &hash->table[j]) { 1733 if (iter->p.net != s->sk_net) 1734 continue; 1735 if (off == pos) { 1736 iter->link = i; 1737 iter->hash_idx = j; 1738 return s; 1739 } 1740 ++off; 1741 } 1742 } 1743 } 1744 return NULL; 1745 } 1746 1747 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1748 __acquires(nl_table_lock) 1749 { 1750 read_lock(&nl_table_lock); 1751 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1752 } 1753 1754 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1755 { 1756 struct sock *s; 1757 struct nl_seq_iter *iter; 1758 int i, j; 1759 1760 ++*pos; 1761 1762 if (v == SEQ_START_TOKEN) 1763 return netlink_seq_socket_idx(seq, 0); 1764 1765 iter = seq->private; 1766 s = v; 1767 do { 1768 s = sk_next(s); 1769 } while (s && (iter->p.net != s->sk_net)); 1770 if (s) 1771 return s; 1772 1773 i = iter->link; 1774 j = iter->hash_idx + 1; 1775 1776 do { 1777 struct nl_pid_hash *hash = &nl_table[i].hash; 1778 1779 for (; j <= hash->mask; j++) { 1780 s = sk_head(&hash->table[j]); 1781 while (s && (iter->p.net != s->sk_net)) 1782 s = sk_next(s); 1783 if (s) { 1784 iter->link = i; 1785 iter->hash_idx = j; 1786 return s; 1787 } 1788 } 1789 1790 j = 0; 1791 } while (++i < MAX_LINKS); 1792 1793 return NULL; 1794 } 1795 1796 static void netlink_seq_stop(struct seq_file *seq, void *v) 1797 __releases(nl_table_lock) 1798 { 1799 read_unlock(&nl_table_lock); 1800 } 1801 1802 1803 static int netlink_seq_show(struct seq_file *seq, void *v) 1804 { 1805 if (v == SEQ_START_TOKEN) 1806 seq_puts(seq, 1807 "sk Eth Pid Groups " 1808 "Rmem Wmem Dump Locks\n"); 1809 else { 1810 struct sock *s = v; 1811 struct netlink_sock *nlk = nlk_sk(s); 1812 1813 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", 1814 s, 1815 s->sk_protocol, 1816 nlk->pid, 1817 nlk->groups ? (u32)nlk->groups[0] : 0, 1818 atomic_read(&s->sk_rmem_alloc), 1819 atomic_read(&s->sk_wmem_alloc), 1820 nlk->cb, 1821 atomic_read(&s->sk_refcnt) 1822 ); 1823 1824 } 1825 return 0; 1826 } 1827 1828 static const struct seq_operations netlink_seq_ops = { 1829 .start = netlink_seq_start, 1830 .next = netlink_seq_next, 1831 .stop = netlink_seq_stop, 1832 .show = netlink_seq_show, 1833 }; 1834 1835 1836 static int netlink_seq_open(struct inode *inode, struct file *file) 1837 { 1838 return seq_open_net(inode, file, &netlink_seq_ops, 1839 sizeof(struct nl_seq_iter)); 1840 } 1841 1842 static const struct file_operations netlink_seq_fops = { 1843 .owner = THIS_MODULE, 1844 .open = netlink_seq_open, 1845 .read = seq_read, 1846 .llseek = seq_lseek, 1847 .release = seq_release_net, 1848 }; 1849 1850 #endif 1851 1852 int netlink_register_notifier(struct notifier_block *nb) 1853 { 1854 return atomic_notifier_chain_register(&netlink_chain, nb); 1855 } 1856 EXPORT_SYMBOL(netlink_register_notifier); 1857 1858 int netlink_unregister_notifier(struct notifier_block *nb) 1859 { 1860 return atomic_notifier_chain_unregister(&netlink_chain, nb); 1861 } 1862 EXPORT_SYMBOL(netlink_unregister_notifier); 1863 1864 static const struct proto_ops netlink_ops = { 1865 .family = PF_NETLINK, 1866 .owner = THIS_MODULE, 1867 .release = netlink_release, 1868 .bind = netlink_bind, 1869 .connect = netlink_connect, 1870 .socketpair = sock_no_socketpair, 1871 .accept = sock_no_accept, 1872 .getname = netlink_getname, 1873 .poll = datagram_poll, 1874 .ioctl = sock_no_ioctl, 1875 .listen = sock_no_listen, 1876 .shutdown = sock_no_shutdown, 1877 .setsockopt = netlink_setsockopt, 1878 .getsockopt = netlink_getsockopt, 1879 .sendmsg = netlink_sendmsg, 1880 .recvmsg = netlink_recvmsg, 1881 .mmap = sock_no_mmap, 1882 .sendpage = sock_no_sendpage, 1883 }; 1884 1885 static struct net_proto_family netlink_family_ops = { 1886 .family = PF_NETLINK, 1887 .create = netlink_create, 1888 .owner = THIS_MODULE, /* for consistency 8) */ 1889 }; 1890 1891 static int __net_init netlink_net_init(struct net *net) 1892 { 1893 #ifdef CONFIG_PROC_FS 1894 if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) 1895 return -ENOMEM; 1896 #endif 1897 return 0; 1898 } 1899 1900 static void __net_exit netlink_net_exit(struct net *net) 1901 { 1902 #ifdef CONFIG_PROC_FS 1903 proc_net_remove(net, "netlink"); 1904 #endif 1905 } 1906 1907 static struct pernet_operations __net_initdata netlink_net_ops = { 1908 .init = netlink_net_init, 1909 .exit = netlink_net_exit, 1910 }; 1911 1912 static int __init netlink_proto_init(void) 1913 { 1914 struct sk_buff *dummy_skb; 1915 int i; 1916 unsigned long limit; 1917 unsigned int order; 1918 int err = proto_register(&netlink_proto, 0); 1919 1920 if (err != 0) 1921 goto out; 1922 1923 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); 1924 1925 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 1926 if (!nl_table) 1927 goto panic; 1928 1929 if (num_physpages >= (128 * 1024)) 1930 limit = num_physpages >> (21 - PAGE_SHIFT); 1931 else 1932 limit = num_physpages >> (23 - PAGE_SHIFT); 1933 1934 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 1935 limit = (1UL << order) / sizeof(struct hlist_head); 1936 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 1937 1938 for (i = 0; i < MAX_LINKS; i++) { 1939 struct nl_pid_hash *hash = &nl_table[i].hash; 1940 1941 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); 1942 if (!hash->table) { 1943 while (i-- > 0) 1944 nl_pid_hash_free(nl_table[i].hash.table, 1945 1 * sizeof(*hash->table)); 1946 kfree(nl_table); 1947 goto panic; 1948 } 1949 hash->max_shift = order; 1950 hash->shift = 0; 1951 hash->mask = 0; 1952 hash->rehash_time = jiffies; 1953 } 1954 1955 sock_register(&netlink_family_ops); 1956 register_pernet_subsys(&netlink_net_ops); 1957 /* The netlink device handler may be needed early. */ 1958 rtnetlink_init(); 1959 out: 1960 return err; 1961 panic: 1962 panic("netlink_init: Cannot allocate nl_table\n"); 1963 } 1964 1965 core_initcall(netlink_proto_init); 1966