1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@redhat.com> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 13 * added netlink_proto_exit 14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 15 * use nlk_sk, as sk->protinfo is on a diet 8) 16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 17 * - inc module use count of module that owns 18 * the kernel socket in case userspace opens 19 * socket of same protocol 20 * - remove all module support, since netlink is 21 * mandatory if CONFIG_NET=y these days 22 */ 23 24 #include <linux/module.h> 25 26 #include <linux/capability.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/signal.h> 30 #include <linux/sched.h> 31 #include <linux/errno.h> 32 #include <linux/string.h> 33 #include <linux/stat.h> 34 #include <linux/socket.h> 35 #include <linux/un.h> 36 #include <linux/fcntl.h> 37 #include <linux/termios.h> 38 #include <linux/sockios.h> 39 #include <linux/net.h> 40 #include <linux/fs.h> 41 #include <linux/slab.h> 42 #include <asm/uaccess.h> 43 #include <linux/skbuff.h> 44 #include <linux/netdevice.h> 45 #include <linux/rtnetlink.h> 46 #include <linux/proc_fs.h> 47 #include <linux/seq_file.h> 48 #include <linux/notifier.h> 49 #include <linux/security.h> 50 #include <linux/jhash.h> 51 #include <linux/jiffies.h> 52 #include <linux/random.h> 53 #include <linux/bitops.h> 54 #include <linux/mm.h> 55 #include <linux/types.h> 56 #include <linux/audit.h> 57 #include <linux/selinux.h> 58 #include <linux/mutex.h> 59 60 #include <net/net_namespace.h> 61 #include <net/sock.h> 62 #include <net/scm.h> 63 #include <net/netlink.h> 64 65 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 66 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) 67 68 struct netlink_sock { 69 /* struct sock has to be the first member of netlink_sock */ 70 struct sock sk; 71 u32 pid; 72 u32 dst_pid; 73 u32 dst_group; 74 u32 flags; 75 u32 subscriptions; 76 u32 ngroups; 77 unsigned long *groups; 78 unsigned long state; 79 wait_queue_head_t wait; 80 struct netlink_callback *cb; 81 struct mutex *cb_mutex; 82 struct mutex cb_def_mutex; 83 void (*netlink_rcv)(struct sk_buff *skb); 84 struct module *module; 85 }; 86 87 #define NETLINK_KERNEL_SOCKET 0x1 88 #define NETLINK_RECV_PKTINFO 0x2 89 90 static inline struct netlink_sock *nlk_sk(struct sock *sk) 91 { 92 return container_of(sk, struct netlink_sock, sk); 93 } 94 95 static inline int netlink_is_kernel(struct sock *sk) 96 { 97 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 98 } 99 100 struct nl_pid_hash { 101 struct hlist_head *table; 102 unsigned long rehash_time; 103 104 unsigned int mask; 105 unsigned int shift; 106 107 unsigned int entries; 108 unsigned int max_shift; 109 110 u32 rnd; 111 }; 112 113 struct netlink_table { 114 struct nl_pid_hash hash; 115 struct hlist_head mc_list; 116 unsigned long *listeners; 117 unsigned int nl_nonroot; 118 unsigned int groups; 119 struct mutex *cb_mutex; 120 struct module *module; 121 int registered; 122 }; 123 124 static struct netlink_table *nl_table; 125 126 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 127 128 static int netlink_dump(struct sock *sk); 129 static void netlink_destroy_callback(struct netlink_callback *cb); 130 131 static DEFINE_RWLOCK(nl_table_lock); 132 static atomic_t nl_table_users = ATOMIC_INIT(0); 133 134 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 135 136 static u32 netlink_group_mask(u32 group) 137 { 138 return group ? 1 << (group - 1) : 0; 139 } 140 141 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) 142 { 143 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; 144 } 145 146 static void netlink_sock_destruct(struct sock *sk) 147 { 148 struct netlink_sock *nlk = nlk_sk(sk); 149 150 if (nlk->cb) { 151 if (nlk->cb->done) 152 nlk->cb->done(nlk->cb); 153 netlink_destroy_callback(nlk->cb); 154 } 155 156 skb_queue_purge(&sk->sk_receive_queue); 157 158 if (!sock_flag(sk, SOCK_DEAD)) { 159 printk("Freeing alive netlink socket %p\n", sk); 160 return; 161 } 162 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); 163 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); 164 BUG_TRAP(!nlk_sk(sk)->groups); 165 } 166 167 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. 168 * Look, when several writers sleep and reader wakes them up, all but one 169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 170 * this, _but_ remember, it adds useless work on UP machines. 171 */ 172 173 static void netlink_table_grab(void) 174 { 175 write_lock_irq(&nl_table_lock); 176 177 if (atomic_read(&nl_table_users)) { 178 DECLARE_WAITQUEUE(wait, current); 179 180 add_wait_queue_exclusive(&nl_table_wait, &wait); 181 for(;;) { 182 set_current_state(TASK_UNINTERRUPTIBLE); 183 if (atomic_read(&nl_table_users) == 0) 184 break; 185 write_unlock_irq(&nl_table_lock); 186 schedule(); 187 write_lock_irq(&nl_table_lock); 188 } 189 190 __set_current_state(TASK_RUNNING); 191 remove_wait_queue(&nl_table_wait, &wait); 192 } 193 } 194 195 static __inline__ void netlink_table_ungrab(void) 196 { 197 write_unlock_irq(&nl_table_lock); 198 wake_up(&nl_table_wait); 199 } 200 201 static __inline__ void 202 netlink_lock_table(void) 203 { 204 /* read_lock() synchronizes us to netlink_table_grab */ 205 206 read_lock(&nl_table_lock); 207 atomic_inc(&nl_table_users); 208 read_unlock(&nl_table_lock); 209 } 210 211 static __inline__ void 212 netlink_unlock_table(void) 213 { 214 if (atomic_dec_and_test(&nl_table_users)) 215 wake_up(&nl_table_wait); 216 } 217 218 static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) 219 { 220 struct nl_pid_hash *hash = &nl_table[protocol].hash; 221 struct hlist_head *head; 222 struct sock *sk; 223 struct hlist_node *node; 224 225 read_lock(&nl_table_lock); 226 head = nl_pid_hashfn(hash, pid); 227 sk_for_each(sk, node, head) { 228 if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { 229 sock_hold(sk); 230 goto found; 231 } 232 } 233 sk = NULL; 234 found: 235 read_unlock(&nl_table_lock); 236 return sk; 237 } 238 239 static inline struct hlist_head *nl_pid_hash_alloc(size_t size) 240 { 241 if (size <= PAGE_SIZE) 242 return kmalloc(size, GFP_ATOMIC); 243 else 244 return (struct hlist_head *) 245 __get_free_pages(GFP_ATOMIC, get_order(size)); 246 } 247 248 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 249 { 250 if (size <= PAGE_SIZE) 251 kfree(table); 252 else 253 free_pages((unsigned long)table, get_order(size)); 254 } 255 256 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) 257 { 258 unsigned int omask, mask, shift; 259 size_t osize, size; 260 struct hlist_head *otable, *table; 261 int i; 262 263 omask = mask = hash->mask; 264 osize = size = (mask + 1) * sizeof(*table); 265 shift = hash->shift; 266 267 if (grow) { 268 if (++shift > hash->max_shift) 269 return 0; 270 mask = mask * 2 + 1; 271 size *= 2; 272 } 273 274 table = nl_pid_hash_alloc(size); 275 if (!table) 276 return 0; 277 278 memset(table, 0, size); 279 otable = hash->table; 280 hash->table = table; 281 hash->mask = mask; 282 hash->shift = shift; 283 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 284 285 for (i = 0; i <= omask; i++) { 286 struct sock *sk; 287 struct hlist_node *node, *tmp; 288 289 sk_for_each_safe(sk, node, tmp, &otable[i]) 290 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); 291 } 292 293 nl_pid_hash_free(otable, osize); 294 hash->rehash_time = jiffies + 10 * 60 * HZ; 295 return 1; 296 } 297 298 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) 299 { 300 int avg = hash->entries >> hash->shift; 301 302 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) 303 return 1; 304 305 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 306 nl_pid_hash_rehash(hash, 0); 307 return 1; 308 } 309 310 return 0; 311 } 312 313 static const struct proto_ops netlink_ops; 314 315 static void 316 netlink_update_listeners(struct sock *sk) 317 { 318 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 319 struct hlist_node *node; 320 unsigned long mask; 321 unsigned int i; 322 323 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 324 mask = 0; 325 sk_for_each_bound(sk, node, &tbl->mc_list) { 326 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 327 mask |= nlk_sk(sk)->groups[i]; 328 } 329 tbl->listeners[i] = mask; 330 } 331 /* this function is only called with the netlink table "grabbed", which 332 * makes sure updates are visible before bind or setsockopt return. */ 333 } 334 335 static int netlink_insert(struct sock *sk, struct net *net, u32 pid) 336 { 337 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 338 struct hlist_head *head; 339 int err = -EADDRINUSE; 340 struct sock *osk; 341 struct hlist_node *node; 342 int len; 343 344 netlink_table_grab(); 345 head = nl_pid_hashfn(hash, pid); 346 len = 0; 347 sk_for_each(osk, node, head) { 348 if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) 349 break; 350 len++; 351 } 352 if (node) 353 goto err; 354 355 err = -EBUSY; 356 if (nlk_sk(sk)->pid) 357 goto err; 358 359 err = -ENOMEM; 360 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 361 goto err; 362 363 if (len && nl_pid_hash_dilute(hash, len)) 364 head = nl_pid_hashfn(hash, pid); 365 hash->entries++; 366 nlk_sk(sk)->pid = pid; 367 sk_add_node(sk, head); 368 err = 0; 369 370 err: 371 netlink_table_ungrab(); 372 return err; 373 } 374 375 static void netlink_remove(struct sock *sk) 376 { 377 netlink_table_grab(); 378 if (sk_del_node_init(sk)) 379 nl_table[sk->sk_protocol].hash.entries--; 380 if (nlk_sk(sk)->subscriptions) 381 __sk_del_bind_node(sk); 382 netlink_table_ungrab(); 383 } 384 385 static struct proto netlink_proto = { 386 .name = "NETLINK", 387 .owner = THIS_MODULE, 388 .obj_size = sizeof(struct netlink_sock), 389 }; 390 391 static int __netlink_create(struct net *net, struct socket *sock, 392 struct mutex *cb_mutex, int protocol) 393 { 394 struct sock *sk; 395 struct netlink_sock *nlk; 396 397 sock->ops = &netlink_ops; 398 399 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); 400 if (!sk) 401 return -ENOMEM; 402 403 sock_init_data(sock, sk); 404 405 nlk = nlk_sk(sk); 406 if (cb_mutex) 407 nlk->cb_mutex = cb_mutex; 408 else { 409 nlk->cb_mutex = &nlk->cb_def_mutex; 410 mutex_init(nlk->cb_mutex); 411 } 412 init_waitqueue_head(&nlk->wait); 413 414 sk->sk_destruct = netlink_sock_destruct; 415 sk->sk_protocol = protocol; 416 return 0; 417 } 418 419 static int netlink_create(struct net *net, struct socket *sock, int protocol) 420 { 421 struct module *module = NULL; 422 struct mutex *cb_mutex; 423 struct netlink_sock *nlk; 424 int err = 0; 425 426 sock->state = SS_UNCONNECTED; 427 428 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 429 return -ESOCKTNOSUPPORT; 430 431 if (protocol<0 || protocol >= MAX_LINKS) 432 return -EPROTONOSUPPORT; 433 434 netlink_lock_table(); 435 #ifdef CONFIG_KMOD 436 if (!nl_table[protocol].registered) { 437 netlink_unlock_table(); 438 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 439 netlink_lock_table(); 440 } 441 #endif 442 if (nl_table[protocol].registered && 443 try_module_get(nl_table[protocol].module)) 444 module = nl_table[protocol].module; 445 cb_mutex = nl_table[protocol].cb_mutex; 446 netlink_unlock_table(); 447 448 if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) 449 goto out_module; 450 451 nlk = nlk_sk(sock->sk); 452 nlk->module = module; 453 out: 454 return err; 455 456 out_module: 457 module_put(module); 458 goto out; 459 } 460 461 static int netlink_release(struct socket *sock) 462 { 463 struct sock *sk = sock->sk; 464 struct netlink_sock *nlk; 465 466 if (!sk) 467 return 0; 468 469 netlink_remove(sk); 470 sock_orphan(sk); 471 nlk = nlk_sk(sk); 472 473 /* 474 * OK. Socket is unlinked, any packets that arrive now 475 * will be purged. 476 */ 477 478 sock->sk = NULL; 479 wake_up_interruptible_all(&nlk->wait); 480 481 skb_queue_purge(&sk->sk_write_queue); 482 483 if (nlk->pid && !nlk->subscriptions) { 484 struct netlink_notify n = { 485 .net = sk->sk_net, 486 .protocol = sk->sk_protocol, 487 .pid = nlk->pid, 488 }; 489 atomic_notifier_call_chain(&netlink_chain, 490 NETLINK_URELEASE, &n); 491 } 492 493 module_put(nlk->module); 494 495 netlink_table_grab(); 496 if (netlink_is_kernel(sk)) { 497 kfree(nl_table[sk->sk_protocol].listeners); 498 nl_table[sk->sk_protocol].module = NULL; 499 nl_table[sk->sk_protocol].registered = 0; 500 } else if (nlk->subscriptions) 501 netlink_update_listeners(sk); 502 netlink_table_ungrab(); 503 504 kfree(nlk->groups); 505 nlk->groups = NULL; 506 507 sock_put(sk); 508 return 0; 509 } 510 511 static int netlink_autobind(struct socket *sock) 512 { 513 struct sock *sk = sock->sk; 514 struct net *net = sk->sk_net; 515 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 516 struct hlist_head *head; 517 struct sock *osk; 518 struct hlist_node *node; 519 s32 pid = current->tgid; 520 int err; 521 static s32 rover = -4097; 522 523 retry: 524 cond_resched(); 525 netlink_table_grab(); 526 head = nl_pid_hashfn(hash, pid); 527 sk_for_each(osk, node, head) { 528 if ((osk->sk_net != net)) 529 continue; 530 if (nlk_sk(osk)->pid == pid) { 531 /* Bind collision, search negative pid values. */ 532 pid = rover--; 533 if (rover > -4097) 534 rover = -4097; 535 netlink_table_ungrab(); 536 goto retry; 537 } 538 } 539 netlink_table_ungrab(); 540 541 err = netlink_insert(sk, net, pid); 542 if (err == -EADDRINUSE) 543 goto retry; 544 545 /* If 2 threads race to autobind, that is fine. */ 546 if (err == -EBUSY) 547 err = 0; 548 549 return err; 550 } 551 552 static inline int netlink_capable(struct socket *sock, unsigned int flag) 553 { 554 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || 555 capable(CAP_NET_ADMIN); 556 } 557 558 static void 559 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 560 { 561 struct netlink_sock *nlk = nlk_sk(sk); 562 563 if (nlk->subscriptions && !subscriptions) 564 __sk_del_bind_node(sk); 565 else if (!nlk->subscriptions && subscriptions) 566 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 567 nlk->subscriptions = subscriptions; 568 } 569 570 static int netlink_realloc_groups(struct sock *sk) 571 { 572 struct netlink_sock *nlk = nlk_sk(sk); 573 unsigned int groups; 574 unsigned long *new_groups; 575 int err = 0; 576 577 netlink_table_grab(); 578 579 groups = nl_table[sk->sk_protocol].groups; 580 if (!nl_table[sk->sk_protocol].registered) { 581 err = -ENOENT; 582 goto out_unlock; 583 } 584 585 if (nlk->ngroups >= groups) 586 goto out_unlock; 587 588 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 589 if (new_groups == NULL) { 590 err = -ENOMEM; 591 goto out_unlock; 592 } 593 memset((char*)new_groups + NLGRPSZ(nlk->ngroups), 0, 594 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 595 596 nlk->groups = new_groups; 597 nlk->ngroups = groups; 598 out_unlock: 599 netlink_table_ungrab(); 600 return err; 601 } 602 603 static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 604 { 605 struct sock *sk = sock->sk; 606 struct net *net = sk->sk_net; 607 struct netlink_sock *nlk = nlk_sk(sk); 608 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 609 int err; 610 611 if (nladdr->nl_family != AF_NETLINK) 612 return -EINVAL; 613 614 /* Only superuser is allowed to listen multicasts */ 615 if (nladdr->nl_groups) { 616 if (!netlink_capable(sock, NL_NONROOT_RECV)) 617 return -EPERM; 618 err = netlink_realloc_groups(sk); 619 if (err) 620 return err; 621 } 622 623 if (nlk->pid) { 624 if (nladdr->nl_pid != nlk->pid) 625 return -EINVAL; 626 } else { 627 err = nladdr->nl_pid ? 628 netlink_insert(sk, net, nladdr->nl_pid) : 629 netlink_autobind(sock); 630 if (err) 631 return err; 632 } 633 634 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 635 return 0; 636 637 netlink_table_grab(); 638 netlink_update_subscriptions(sk, nlk->subscriptions + 639 hweight32(nladdr->nl_groups) - 640 hweight32(nlk->groups[0])); 641 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 642 netlink_update_listeners(sk); 643 netlink_table_ungrab(); 644 645 return 0; 646 } 647 648 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 649 int alen, int flags) 650 { 651 int err = 0; 652 struct sock *sk = sock->sk; 653 struct netlink_sock *nlk = nlk_sk(sk); 654 struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; 655 656 if (addr->sa_family == AF_UNSPEC) { 657 sk->sk_state = NETLINK_UNCONNECTED; 658 nlk->dst_pid = 0; 659 nlk->dst_group = 0; 660 return 0; 661 } 662 if (addr->sa_family != AF_NETLINK) 663 return -EINVAL; 664 665 /* Only superuser is allowed to send multicasts */ 666 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) 667 return -EPERM; 668 669 if (!nlk->pid) 670 err = netlink_autobind(sock); 671 672 if (err == 0) { 673 sk->sk_state = NETLINK_CONNECTED; 674 nlk->dst_pid = nladdr->nl_pid; 675 nlk->dst_group = ffs(nladdr->nl_groups); 676 } 677 678 return err; 679 } 680 681 static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) 682 { 683 struct sock *sk = sock->sk; 684 struct netlink_sock *nlk = nlk_sk(sk); 685 struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; 686 687 nladdr->nl_family = AF_NETLINK; 688 nladdr->nl_pad = 0; 689 *addr_len = sizeof(*nladdr); 690 691 if (peer) { 692 nladdr->nl_pid = nlk->dst_pid; 693 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 694 } else { 695 nladdr->nl_pid = nlk->pid; 696 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 697 } 698 return 0; 699 } 700 701 static void netlink_overrun(struct sock *sk) 702 { 703 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 704 sk->sk_err = ENOBUFS; 705 sk->sk_error_report(sk); 706 } 707 } 708 709 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 710 { 711 struct sock *sock; 712 struct netlink_sock *nlk; 713 714 sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid); 715 if (!sock) 716 return ERR_PTR(-ECONNREFUSED); 717 718 /* Don't bother queuing skb if kernel socket has no input function */ 719 nlk = nlk_sk(sock); 720 if (sock->sk_state == NETLINK_CONNECTED && 721 nlk->dst_pid != nlk_sk(ssk)->pid) { 722 sock_put(sock); 723 return ERR_PTR(-ECONNREFUSED); 724 } 725 return sock; 726 } 727 728 struct sock *netlink_getsockbyfilp(struct file *filp) 729 { 730 struct inode *inode = filp->f_path.dentry->d_inode; 731 struct sock *sock; 732 733 if (!S_ISSOCK(inode->i_mode)) 734 return ERR_PTR(-ENOTSOCK); 735 736 sock = SOCKET_I(inode)->sk; 737 if (sock->sk_family != AF_NETLINK) 738 return ERR_PTR(-EINVAL); 739 740 sock_hold(sock); 741 return sock; 742 } 743 744 /* 745 * Attach a skb to a netlink socket. 746 * The caller must hold a reference to the destination socket. On error, the 747 * reference is dropped. The skb is not send to the destination, just all 748 * all error checks are performed and memory in the queue is reserved. 749 * Return values: 750 * < 0: error. skb freed, reference to sock dropped. 751 * 0: continue 752 * 1: repeat lookup - reference dropped while waiting for socket memory. 753 */ 754 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, 755 long timeo, struct sock *ssk) 756 { 757 struct netlink_sock *nlk; 758 759 nlk = nlk_sk(sk); 760 761 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 762 test_bit(0, &nlk->state)) { 763 DECLARE_WAITQUEUE(wait, current); 764 if (!timeo) { 765 if (!ssk || netlink_is_kernel(ssk)) 766 netlink_overrun(sk); 767 sock_put(sk); 768 kfree_skb(skb); 769 return -EAGAIN; 770 } 771 772 __set_current_state(TASK_INTERRUPTIBLE); 773 add_wait_queue(&nlk->wait, &wait); 774 775 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 776 test_bit(0, &nlk->state)) && 777 !sock_flag(sk, SOCK_DEAD)) 778 timeo = schedule_timeout(timeo); 779 780 __set_current_state(TASK_RUNNING); 781 remove_wait_queue(&nlk->wait, &wait); 782 sock_put(sk); 783 784 if (signal_pending(current)) { 785 kfree_skb(skb); 786 return sock_intr_errno(timeo); 787 } 788 return 1; 789 } 790 skb_set_owner_r(skb, sk); 791 return 0; 792 } 793 794 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 795 { 796 int len = skb->len; 797 798 skb_queue_tail(&sk->sk_receive_queue, skb); 799 sk->sk_data_ready(sk, len); 800 sock_put(sk); 801 return len; 802 } 803 804 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 805 { 806 kfree_skb(skb); 807 sock_put(sk); 808 } 809 810 static inline struct sk_buff *netlink_trim(struct sk_buff *skb, 811 gfp_t allocation) 812 { 813 int delta; 814 815 skb_orphan(skb); 816 817 delta = skb->end - skb->tail; 818 if (delta * 2 < skb->truesize) 819 return skb; 820 821 if (skb_shared(skb)) { 822 struct sk_buff *nskb = skb_clone(skb, allocation); 823 if (!nskb) 824 return skb; 825 kfree_skb(skb); 826 skb = nskb; 827 } 828 829 if (!pskb_expand_head(skb, 0, -delta, allocation)) 830 skb->truesize -= delta; 831 832 return skb; 833 } 834 835 static inline void netlink_rcv_wake(struct sock *sk) 836 { 837 struct netlink_sock *nlk = nlk_sk(sk); 838 839 if (skb_queue_empty(&sk->sk_receive_queue)) 840 clear_bit(0, &nlk->state); 841 if (!test_bit(0, &nlk->state)) 842 wake_up_interruptible(&nlk->wait); 843 } 844 845 static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) 846 { 847 int ret; 848 struct netlink_sock *nlk = nlk_sk(sk); 849 850 ret = -ECONNREFUSED; 851 if (nlk->netlink_rcv != NULL) { 852 ret = skb->len; 853 skb_set_owner_r(skb, sk); 854 nlk->netlink_rcv(skb); 855 } 856 kfree_skb(skb); 857 sock_put(sk); 858 return ret; 859 } 860 861 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 862 u32 pid, int nonblock) 863 { 864 struct sock *sk; 865 int err; 866 long timeo; 867 868 skb = netlink_trim(skb, gfp_any()); 869 870 timeo = sock_sndtimeo(ssk, nonblock); 871 retry: 872 sk = netlink_getsockbypid(ssk, pid); 873 if (IS_ERR(sk)) { 874 kfree_skb(skb); 875 return PTR_ERR(sk); 876 } 877 if (netlink_is_kernel(sk)) 878 return netlink_unicast_kernel(sk, skb); 879 880 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); 881 if (err == 1) 882 goto retry; 883 if (err) 884 return err; 885 886 return netlink_sendskb(sk, skb); 887 } 888 889 int netlink_has_listeners(struct sock *sk, unsigned int group) 890 { 891 int res = 0; 892 unsigned long *listeners; 893 894 BUG_ON(!netlink_is_kernel(sk)); 895 896 rcu_read_lock(); 897 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 898 899 if (group - 1 < nl_table[sk->sk_protocol].groups) 900 res = test_bit(group - 1, listeners); 901 902 rcu_read_unlock(); 903 904 return res; 905 } 906 EXPORT_SYMBOL_GPL(netlink_has_listeners); 907 908 static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 909 { 910 struct netlink_sock *nlk = nlk_sk(sk); 911 912 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 913 !test_bit(0, &nlk->state)) { 914 skb_set_owner_r(skb, sk); 915 skb_queue_tail(&sk->sk_receive_queue, skb); 916 sk->sk_data_ready(sk, skb->len); 917 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; 918 } 919 return -1; 920 } 921 922 struct netlink_broadcast_data { 923 struct sock *exclude_sk; 924 struct net *net; 925 u32 pid; 926 u32 group; 927 int failure; 928 int congested; 929 int delivered; 930 gfp_t allocation; 931 struct sk_buff *skb, *skb2; 932 }; 933 934 static inline int do_one_broadcast(struct sock *sk, 935 struct netlink_broadcast_data *p) 936 { 937 struct netlink_sock *nlk = nlk_sk(sk); 938 int val; 939 940 if (p->exclude_sk == sk) 941 goto out; 942 943 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 944 !test_bit(p->group - 1, nlk->groups)) 945 goto out; 946 947 if ((sk->sk_net != p->net)) 948 goto out; 949 950 if (p->failure) { 951 netlink_overrun(sk); 952 goto out; 953 } 954 955 sock_hold(sk); 956 if (p->skb2 == NULL) { 957 if (skb_shared(p->skb)) { 958 p->skb2 = skb_clone(p->skb, p->allocation); 959 } else { 960 p->skb2 = skb_get(p->skb); 961 /* 962 * skb ownership may have been set when 963 * delivered to a previous socket. 964 */ 965 skb_orphan(p->skb2); 966 } 967 } 968 if (p->skb2 == NULL) { 969 netlink_overrun(sk); 970 /* Clone failed. Notify ALL listeners. */ 971 p->failure = 1; 972 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 973 netlink_overrun(sk); 974 } else { 975 p->congested |= val; 976 p->delivered = 1; 977 p->skb2 = NULL; 978 } 979 sock_put(sk); 980 981 out: 982 return 0; 983 } 984 985 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 986 u32 group, gfp_t allocation) 987 { 988 struct net *net = ssk->sk_net; 989 struct netlink_broadcast_data info; 990 struct hlist_node *node; 991 struct sock *sk; 992 993 skb = netlink_trim(skb, allocation); 994 995 info.exclude_sk = ssk; 996 info.net = net; 997 info.pid = pid; 998 info.group = group; 999 info.failure = 0; 1000 info.congested = 0; 1001 info.delivered = 0; 1002 info.allocation = allocation; 1003 info.skb = skb; 1004 info.skb2 = NULL; 1005 1006 /* While we sleep in clone, do not allow to change socket list */ 1007 1008 netlink_lock_table(); 1009 1010 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1011 do_one_broadcast(sk, &info); 1012 1013 kfree_skb(skb); 1014 1015 netlink_unlock_table(); 1016 1017 if (info.skb2) 1018 kfree_skb(info.skb2); 1019 1020 if (info.delivered) { 1021 if (info.congested && (allocation & __GFP_WAIT)) 1022 yield(); 1023 return 0; 1024 } 1025 if (info.failure) 1026 return -ENOBUFS; 1027 return -ESRCH; 1028 } 1029 1030 struct netlink_set_err_data { 1031 struct sock *exclude_sk; 1032 u32 pid; 1033 u32 group; 1034 int code; 1035 }; 1036 1037 static inline int do_one_set_err(struct sock *sk, 1038 struct netlink_set_err_data *p) 1039 { 1040 struct netlink_sock *nlk = nlk_sk(sk); 1041 1042 if (sk == p->exclude_sk) 1043 goto out; 1044 1045 if (sk->sk_net != p->exclude_sk->sk_net) 1046 goto out; 1047 1048 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 1049 !test_bit(p->group - 1, nlk->groups)) 1050 goto out; 1051 1052 sk->sk_err = p->code; 1053 sk->sk_error_report(sk); 1054 out: 1055 return 0; 1056 } 1057 1058 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) 1059 { 1060 struct netlink_set_err_data info; 1061 struct hlist_node *node; 1062 struct sock *sk; 1063 1064 info.exclude_sk = ssk; 1065 info.pid = pid; 1066 info.group = group; 1067 info.code = code; 1068 1069 read_lock(&nl_table_lock); 1070 1071 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1072 do_one_set_err(sk, &info); 1073 1074 read_unlock(&nl_table_lock); 1075 } 1076 1077 /* must be called with netlink table grabbed */ 1078 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1079 unsigned int group, 1080 int is_new) 1081 { 1082 int old, new = !!is_new, subscriptions; 1083 1084 old = test_bit(group - 1, nlk->groups); 1085 subscriptions = nlk->subscriptions - old + new; 1086 if (new) 1087 __set_bit(group - 1, nlk->groups); 1088 else 1089 __clear_bit(group - 1, nlk->groups); 1090 netlink_update_subscriptions(&nlk->sk, subscriptions); 1091 netlink_update_listeners(&nlk->sk); 1092 } 1093 1094 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1095 char __user *optval, int optlen) 1096 { 1097 struct sock *sk = sock->sk; 1098 struct netlink_sock *nlk = nlk_sk(sk); 1099 unsigned int val = 0; 1100 int err; 1101 1102 if (level != SOL_NETLINK) 1103 return -ENOPROTOOPT; 1104 1105 if (optlen >= sizeof(int) && 1106 get_user(val, (unsigned int __user *)optval)) 1107 return -EFAULT; 1108 1109 switch (optname) { 1110 case NETLINK_PKTINFO: 1111 if (val) 1112 nlk->flags |= NETLINK_RECV_PKTINFO; 1113 else 1114 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1115 err = 0; 1116 break; 1117 case NETLINK_ADD_MEMBERSHIP: 1118 case NETLINK_DROP_MEMBERSHIP: { 1119 if (!netlink_capable(sock, NL_NONROOT_RECV)) 1120 return -EPERM; 1121 err = netlink_realloc_groups(sk); 1122 if (err) 1123 return err; 1124 if (!val || val - 1 >= nlk->ngroups) 1125 return -EINVAL; 1126 netlink_table_grab(); 1127 netlink_update_socket_mc(nlk, val, 1128 optname == NETLINK_ADD_MEMBERSHIP); 1129 netlink_table_ungrab(); 1130 err = 0; 1131 break; 1132 } 1133 default: 1134 err = -ENOPROTOOPT; 1135 } 1136 return err; 1137 } 1138 1139 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1140 char __user *optval, int __user *optlen) 1141 { 1142 struct sock *sk = sock->sk; 1143 struct netlink_sock *nlk = nlk_sk(sk); 1144 int len, val, err; 1145 1146 if (level != SOL_NETLINK) 1147 return -ENOPROTOOPT; 1148 1149 if (get_user(len, optlen)) 1150 return -EFAULT; 1151 if (len < 0) 1152 return -EINVAL; 1153 1154 switch (optname) { 1155 case NETLINK_PKTINFO: 1156 if (len < sizeof(int)) 1157 return -EINVAL; 1158 len = sizeof(int); 1159 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 1160 if (put_user(len, optlen) || 1161 put_user(val, optval)) 1162 return -EFAULT; 1163 err = 0; 1164 break; 1165 default: 1166 err = -ENOPROTOOPT; 1167 } 1168 return err; 1169 } 1170 1171 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1172 { 1173 struct nl_pktinfo info; 1174 1175 info.group = NETLINK_CB(skb).dst_group; 1176 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1177 } 1178 1179 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1180 struct msghdr *msg, size_t len) 1181 { 1182 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1183 struct sock *sk = sock->sk; 1184 struct netlink_sock *nlk = nlk_sk(sk); 1185 struct sockaddr_nl *addr=msg->msg_name; 1186 u32 dst_pid; 1187 u32 dst_group; 1188 struct sk_buff *skb; 1189 int err; 1190 struct scm_cookie scm; 1191 1192 if (msg->msg_flags&MSG_OOB) 1193 return -EOPNOTSUPP; 1194 1195 if (NULL == siocb->scm) 1196 siocb->scm = &scm; 1197 err = scm_send(sock, msg, siocb->scm); 1198 if (err < 0) 1199 return err; 1200 1201 if (msg->msg_namelen) { 1202 if (addr->nl_family != AF_NETLINK) 1203 return -EINVAL; 1204 dst_pid = addr->nl_pid; 1205 dst_group = ffs(addr->nl_groups); 1206 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1207 return -EPERM; 1208 } else { 1209 dst_pid = nlk->dst_pid; 1210 dst_group = nlk->dst_group; 1211 } 1212 1213 if (!nlk->pid) { 1214 err = netlink_autobind(sock); 1215 if (err) 1216 goto out; 1217 } 1218 1219 err = -EMSGSIZE; 1220 if (len > sk->sk_sndbuf - 32) 1221 goto out; 1222 err = -ENOBUFS; 1223 skb = alloc_skb(len, GFP_KERNEL); 1224 if (skb==NULL) 1225 goto out; 1226 1227 NETLINK_CB(skb).pid = nlk->pid; 1228 NETLINK_CB(skb).dst_group = dst_group; 1229 NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); 1230 selinux_get_task_sid(current, &(NETLINK_CB(skb).sid)); 1231 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1232 1233 /* What can I do? Netlink is asynchronous, so that 1234 we will have to save current capabilities to 1235 check them, when this message will be delivered 1236 to corresponding kernel module. --ANK (980802) 1237 */ 1238 1239 err = -EFAULT; 1240 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) { 1241 kfree_skb(skb); 1242 goto out; 1243 } 1244 1245 err = security_netlink_send(sk, skb); 1246 if (err) { 1247 kfree_skb(skb); 1248 goto out; 1249 } 1250 1251 if (dst_group) { 1252 atomic_inc(&skb->users); 1253 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); 1254 } 1255 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1256 1257 out: 1258 return err; 1259 } 1260 1261 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 1262 struct msghdr *msg, size_t len, 1263 int flags) 1264 { 1265 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1266 struct scm_cookie scm; 1267 struct sock *sk = sock->sk; 1268 struct netlink_sock *nlk = nlk_sk(sk); 1269 int noblock = flags&MSG_DONTWAIT; 1270 size_t copied; 1271 struct sk_buff *skb; 1272 int err; 1273 1274 if (flags&MSG_OOB) 1275 return -EOPNOTSUPP; 1276 1277 copied = 0; 1278 1279 skb = skb_recv_datagram(sk,flags,noblock,&err); 1280 if (skb==NULL) 1281 goto out; 1282 1283 msg->msg_namelen = 0; 1284 1285 copied = skb->len; 1286 if (len < copied) { 1287 msg->msg_flags |= MSG_TRUNC; 1288 copied = len; 1289 } 1290 1291 skb_reset_transport_header(skb); 1292 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1293 1294 if (msg->msg_name) { 1295 struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name; 1296 addr->nl_family = AF_NETLINK; 1297 addr->nl_pad = 0; 1298 addr->nl_pid = NETLINK_CB(skb).pid; 1299 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1300 msg->msg_namelen = sizeof(*addr); 1301 } 1302 1303 if (nlk->flags & NETLINK_RECV_PKTINFO) 1304 netlink_cmsg_recv_pktinfo(msg, skb); 1305 1306 if (NULL == siocb->scm) { 1307 memset(&scm, 0, sizeof(scm)); 1308 siocb->scm = &scm; 1309 } 1310 siocb->scm->creds = *NETLINK_CREDS(skb); 1311 if (flags & MSG_TRUNC) 1312 copied = skb->len; 1313 skb_free_datagram(sk, skb); 1314 1315 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1316 netlink_dump(sk); 1317 1318 scm_recv(sock, msg, siocb->scm, flags); 1319 out: 1320 netlink_rcv_wake(sk); 1321 return err ? : copied; 1322 } 1323 1324 static void netlink_data_ready(struct sock *sk, int len) 1325 { 1326 BUG(); 1327 } 1328 1329 /* 1330 * We export these functions to other modules. They provide a 1331 * complete set of kernel non-blocking support for message 1332 * queueing. 1333 */ 1334 1335 struct sock * 1336 netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1337 void (*input)(struct sk_buff *skb), 1338 struct mutex *cb_mutex, struct module *module) 1339 { 1340 struct socket *sock; 1341 struct sock *sk; 1342 struct netlink_sock *nlk; 1343 unsigned long *listeners = NULL; 1344 1345 BUG_ON(!nl_table); 1346 1347 if (unit<0 || unit>=MAX_LINKS) 1348 return NULL; 1349 1350 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1351 return NULL; 1352 1353 if (__netlink_create(net, sock, cb_mutex, unit) < 0) 1354 goto out_sock_release; 1355 1356 if (groups < 32) 1357 groups = 32; 1358 1359 listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL); 1360 if (!listeners) 1361 goto out_sock_release; 1362 1363 sk = sock->sk; 1364 sk->sk_data_ready = netlink_data_ready; 1365 if (input) 1366 nlk_sk(sk)->netlink_rcv = input; 1367 1368 if (netlink_insert(sk, net, 0)) 1369 goto out_sock_release; 1370 1371 nlk = nlk_sk(sk); 1372 nlk->flags |= NETLINK_KERNEL_SOCKET; 1373 1374 netlink_table_grab(); 1375 if (!nl_table[unit].registered) { 1376 nl_table[unit].groups = groups; 1377 nl_table[unit].listeners = listeners; 1378 nl_table[unit].cb_mutex = cb_mutex; 1379 nl_table[unit].module = module; 1380 nl_table[unit].registered = 1; 1381 } else { 1382 kfree(listeners); 1383 } 1384 netlink_table_ungrab(); 1385 1386 return sk; 1387 1388 out_sock_release: 1389 kfree(listeners); 1390 sock_release(sock); 1391 return NULL; 1392 } 1393 1394 /** 1395 * netlink_change_ngroups - change number of multicast groups 1396 * 1397 * This changes the number of multicast groups that are available 1398 * on a certain netlink family. Note that it is not possible to 1399 * change the number of groups to below 32. Also note that it does 1400 * not implicitly call netlink_clear_multicast_users() when the 1401 * number of groups is reduced. 1402 * 1403 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1404 * @groups: The new number of groups. 1405 */ 1406 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1407 { 1408 unsigned long *listeners, *old = NULL; 1409 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1410 int err = 0; 1411 1412 if (groups < 32) 1413 groups = 32; 1414 1415 netlink_table_grab(); 1416 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1417 listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC); 1418 if (!listeners) { 1419 err = -ENOMEM; 1420 goto out_ungrab; 1421 } 1422 old = tbl->listeners; 1423 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1424 rcu_assign_pointer(tbl->listeners, listeners); 1425 } 1426 tbl->groups = groups; 1427 1428 out_ungrab: 1429 netlink_table_ungrab(); 1430 synchronize_rcu(); 1431 kfree(old); 1432 return err; 1433 } 1434 EXPORT_SYMBOL(netlink_change_ngroups); 1435 1436 /** 1437 * netlink_clear_multicast_users - kick off multicast listeners 1438 * 1439 * This function removes all listeners from the given group. 1440 * @ksk: The kernel netlink socket, as returned by 1441 * netlink_kernel_create(). 1442 * @group: The multicast group to clear. 1443 */ 1444 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1445 { 1446 struct sock *sk; 1447 struct hlist_node *node; 1448 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 1449 1450 netlink_table_grab(); 1451 1452 sk_for_each_bound(sk, node, &tbl->mc_list) 1453 netlink_update_socket_mc(nlk_sk(sk), group, 0); 1454 1455 netlink_table_ungrab(); 1456 } 1457 EXPORT_SYMBOL(netlink_clear_multicast_users); 1458 1459 void netlink_set_nonroot(int protocol, unsigned int flags) 1460 { 1461 if ((unsigned int)protocol < MAX_LINKS) 1462 nl_table[protocol].nl_nonroot = flags; 1463 } 1464 1465 static void netlink_destroy_callback(struct netlink_callback *cb) 1466 { 1467 if (cb->skb) 1468 kfree_skb(cb->skb); 1469 kfree(cb); 1470 } 1471 1472 /* 1473 * It looks a bit ugly. 1474 * It would be better to create kernel thread. 1475 */ 1476 1477 static int netlink_dump(struct sock *sk) 1478 { 1479 struct netlink_sock *nlk = nlk_sk(sk); 1480 struct netlink_callback *cb; 1481 struct sk_buff *skb; 1482 struct nlmsghdr *nlh; 1483 int len, err = -ENOBUFS; 1484 1485 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); 1486 if (!skb) 1487 goto errout; 1488 1489 mutex_lock(nlk->cb_mutex); 1490 1491 cb = nlk->cb; 1492 if (cb == NULL) { 1493 err = -EINVAL; 1494 goto errout_skb; 1495 } 1496 1497 len = cb->dump(skb, cb); 1498 1499 if (len > 0) { 1500 mutex_unlock(nlk->cb_mutex); 1501 skb_queue_tail(&sk->sk_receive_queue, skb); 1502 sk->sk_data_ready(sk, len); 1503 return 0; 1504 } 1505 1506 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1507 if (!nlh) 1508 goto errout_skb; 1509 1510 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 1511 1512 skb_queue_tail(&sk->sk_receive_queue, skb); 1513 sk->sk_data_ready(sk, skb->len); 1514 1515 if (cb->done) 1516 cb->done(cb); 1517 nlk->cb = NULL; 1518 mutex_unlock(nlk->cb_mutex); 1519 1520 netlink_destroy_callback(cb); 1521 return 0; 1522 1523 errout_skb: 1524 mutex_unlock(nlk->cb_mutex); 1525 kfree_skb(skb); 1526 errout: 1527 return err; 1528 } 1529 1530 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1531 struct nlmsghdr *nlh, 1532 int (*dump)(struct sk_buff *skb, struct netlink_callback*), 1533 int (*done)(struct netlink_callback*)) 1534 { 1535 struct netlink_callback *cb; 1536 struct sock *sk; 1537 struct netlink_sock *nlk; 1538 1539 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1540 if (cb == NULL) 1541 return -ENOBUFS; 1542 1543 cb->dump = dump; 1544 cb->done = done; 1545 cb->nlh = nlh; 1546 atomic_inc(&skb->users); 1547 cb->skb = skb; 1548 1549 sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); 1550 if (sk == NULL) { 1551 netlink_destroy_callback(cb); 1552 return -ECONNREFUSED; 1553 } 1554 nlk = nlk_sk(sk); 1555 /* A dump is in progress... */ 1556 mutex_lock(nlk->cb_mutex); 1557 if (nlk->cb) { 1558 mutex_unlock(nlk->cb_mutex); 1559 netlink_destroy_callback(cb); 1560 sock_put(sk); 1561 return -EBUSY; 1562 } 1563 nlk->cb = cb; 1564 mutex_unlock(nlk->cb_mutex); 1565 1566 netlink_dump(sk); 1567 sock_put(sk); 1568 return 0; 1569 } 1570 1571 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1572 { 1573 struct sk_buff *skb; 1574 struct nlmsghdr *rep; 1575 struct nlmsgerr *errmsg; 1576 size_t payload = sizeof(*errmsg); 1577 1578 /* error messages get the original request appened */ 1579 if (err) 1580 payload += nlmsg_len(nlh); 1581 1582 skb = nlmsg_new(payload, GFP_KERNEL); 1583 if (!skb) { 1584 struct sock *sk; 1585 1586 sk = netlink_lookup(in_skb->sk->sk_net, 1587 in_skb->sk->sk_protocol, 1588 NETLINK_CB(in_skb).pid); 1589 if (sk) { 1590 sk->sk_err = ENOBUFS; 1591 sk->sk_error_report(sk); 1592 sock_put(sk); 1593 } 1594 return; 1595 } 1596 1597 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1598 NLMSG_ERROR, sizeof(struct nlmsgerr), 0); 1599 errmsg = nlmsg_data(rep); 1600 errmsg->error = err; 1601 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1602 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1603 } 1604 1605 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1606 struct nlmsghdr *)) 1607 { 1608 struct nlmsghdr *nlh; 1609 int err; 1610 1611 while (skb->len >= nlmsg_total_size(0)) { 1612 int msglen; 1613 1614 nlh = nlmsg_hdr(skb); 1615 err = 0; 1616 1617 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1618 return 0; 1619 1620 /* Only requests are handled by the kernel */ 1621 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 1622 goto skip; 1623 1624 /* Skip control messages */ 1625 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 1626 goto skip; 1627 1628 err = cb(skb, nlh); 1629 skip: 1630 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1631 netlink_ack(skb, nlh, err); 1632 1633 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1634 if (msglen > skb->len) 1635 msglen = skb->len; 1636 skb_pull(skb, msglen); 1637 } 1638 1639 return 0; 1640 } 1641 1642 /** 1643 * nlmsg_notify - send a notification netlink message 1644 * @sk: netlink socket to use 1645 * @skb: notification message 1646 * @pid: destination netlink pid for reports or 0 1647 * @group: destination multicast group or 0 1648 * @report: 1 to report back, 0 to disable 1649 * @flags: allocation flags 1650 */ 1651 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, 1652 unsigned int group, int report, gfp_t flags) 1653 { 1654 int err = 0; 1655 1656 if (group) { 1657 int exclude_pid = 0; 1658 1659 if (report) { 1660 atomic_inc(&skb->users); 1661 exclude_pid = pid; 1662 } 1663 1664 /* errors reported via destination sk->sk_err */ 1665 nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1666 } 1667 1668 if (report) 1669 err = nlmsg_unicast(sk, skb, pid); 1670 1671 return err; 1672 } 1673 1674 #ifdef CONFIG_PROC_FS 1675 struct nl_seq_iter { 1676 struct net *net; 1677 int link; 1678 int hash_idx; 1679 }; 1680 1681 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 1682 { 1683 struct nl_seq_iter *iter = seq->private; 1684 int i, j; 1685 struct sock *s; 1686 struct hlist_node *node; 1687 loff_t off = 0; 1688 1689 for (i=0; i<MAX_LINKS; i++) { 1690 struct nl_pid_hash *hash = &nl_table[i].hash; 1691 1692 for (j = 0; j <= hash->mask; j++) { 1693 sk_for_each(s, node, &hash->table[j]) { 1694 if (iter->net != s->sk_net) 1695 continue; 1696 if (off == pos) { 1697 iter->link = i; 1698 iter->hash_idx = j; 1699 return s; 1700 } 1701 ++off; 1702 } 1703 } 1704 } 1705 return NULL; 1706 } 1707 1708 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1709 { 1710 read_lock(&nl_table_lock); 1711 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1712 } 1713 1714 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1715 { 1716 struct sock *s; 1717 struct nl_seq_iter *iter; 1718 int i, j; 1719 1720 ++*pos; 1721 1722 if (v == SEQ_START_TOKEN) 1723 return netlink_seq_socket_idx(seq, 0); 1724 1725 iter = seq->private; 1726 s = v; 1727 do { 1728 s = sk_next(s); 1729 } while (s && (iter->net != s->sk_net)); 1730 if (s) 1731 return s; 1732 1733 i = iter->link; 1734 j = iter->hash_idx + 1; 1735 1736 do { 1737 struct nl_pid_hash *hash = &nl_table[i].hash; 1738 1739 for (; j <= hash->mask; j++) { 1740 s = sk_head(&hash->table[j]); 1741 while (s && (iter->net != s->sk_net)) 1742 s = sk_next(s); 1743 if (s) { 1744 iter->link = i; 1745 iter->hash_idx = j; 1746 return s; 1747 } 1748 } 1749 1750 j = 0; 1751 } while (++i < MAX_LINKS); 1752 1753 return NULL; 1754 } 1755 1756 static void netlink_seq_stop(struct seq_file *seq, void *v) 1757 { 1758 read_unlock(&nl_table_lock); 1759 } 1760 1761 1762 static int netlink_seq_show(struct seq_file *seq, void *v) 1763 { 1764 if (v == SEQ_START_TOKEN) 1765 seq_puts(seq, 1766 "sk Eth Pid Groups " 1767 "Rmem Wmem Dump Locks\n"); 1768 else { 1769 struct sock *s = v; 1770 struct netlink_sock *nlk = nlk_sk(s); 1771 1772 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", 1773 s, 1774 s->sk_protocol, 1775 nlk->pid, 1776 nlk->groups ? (u32)nlk->groups[0] : 0, 1777 atomic_read(&s->sk_rmem_alloc), 1778 atomic_read(&s->sk_wmem_alloc), 1779 nlk->cb, 1780 atomic_read(&s->sk_refcnt) 1781 ); 1782 1783 } 1784 return 0; 1785 } 1786 1787 static const struct seq_operations netlink_seq_ops = { 1788 .start = netlink_seq_start, 1789 .next = netlink_seq_next, 1790 .stop = netlink_seq_stop, 1791 .show = netlink_seq_show, 1792 }; 1793 1794 1795 static int netlink_seq_open(struct inode *inode, struct file *file) 1796 { 1797 struct nl_seq_iter *iter; 1798 1799 iter = __seq_open_private(file, &netlink_seq_ops, sizeof(*iter)); 1800 if (!iter) 1801 return -ENOMEM; 1802 1803 iter->net = get_proc_net(inode); 1804 if (!iter->net) { 1805 seq_release_private(inode, file); 1806 return -ENXIO; 1807 } 1808 1809 return 0; 1810 } 1811 1812 static int netlink_seq_release(struct inode *inode, struct file *file) 1813 { 1814 struct seq_file *seq = file->private_data; 1815 struct nl_seq_iter *iter = seq->private; 1816 put_net(iter->net); 1817 return seq_release_private(inode, file); 1818 } 1819 1820 static const struct file_operations netlink_seq_fops = { 1821 .owner = THIS_MODULE, 1822 .open = netlink_seq_open, 1823 .read = seq_read, 1824 .llseek = seq_lseek, 1825 .release = netlink_seq_release, 1826 }; 1827 1828 #endif 1829 1830 int netlink_register_notifier(struct notifier_block *nb) 1831 { 1832 return atomic_notifier_chain_register(&netlink_chain, nb); 1833 } 1834 1835 int netlink_unregister_notifier(struct notifier_block *nb) 1836 { 1837 return atomic_notifier_chain_unregister(&netlink_chain, nb); 1838 } 1839 1840 static const struct proto_ops netlink_ops = { 1841 .family = PF_NETLINK, 1842 .owner = THIS_MODULE, 1843 .release = netlink_release, 1844 .bind = netlink_bind, 1845 .connect = netlink_connect, 1846 .socketpair = sock_no_socketpair, 1847 .accept = sock_no_accept, 1848 .getname = netlink_getname, 1849 .poll = datagram_poll, 1850 .ioctl = sock_no_ioctl, 1851 .listen = sock_no_listen, 1852 .shutdown = sock_no_shutdown, 1853 .setsockopt = netlink_setsockopt, 1854 .getsockopt = netlink_getsockopt, 1855 .sendmsg = netlink_sendmsg, 1856 .recvmsg = netlink_recvmsg, 1857 .mmap = sock_no_mmap, 1858 .sendpage = sock_no_sendpage, 1859 }; 1860 1861 static struct net_proto_family netlink_family_ops = { 1862 .family = PF_NETLINK, 1863 .create = netlink_create, 1864 .owner = THIS_MODULE, /* for consistency 8) */ 1865 }; 1866 1867 static int __net_init netlink_net_init(struct net *net) 1868 { 1869 #ifdef CONFIG_PROC_FS 1870 if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) 1871 return -ENOMEM; 1872 #endif 1873 return 0; 1874 } 1875 1876 static void __net_exit netlink_net_exit(struct net *net) 1877 { 1878 #ifdef CONFIG_PROC_FS 1879 proc_net_remove(net, "netlink"); 1880 #endif 1881 } 1882 1883 static struct pernet_operations __net_initdata netlink_net_ops = { 1884 .init = netlink_net_init, 1885 .exit = netlink_net_exit, 1886 }; 1887 1888 static int __init netlink_proto_init(void) 1889 { 1890 struct sk_buff *dummy_skb; 1891 int i; 1892 unsigned long limit; 1893 unsigned int order; 1894 int err = proto_register(&netlink_proto, 0); 1895 1896 if (err != 0) 1897 goto out; 1898 1899 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); 1900 1901 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 1902 if (!nl_table) 1903 goto panic; 1904 1905 if (num_physpages >= (128 * 1024)) 1906 limit = num_physpages >> (21 - PAGE_SHIFT); 1907 else 1908 limit = num_physpages >> (23 - PAGE_SHIFT); 1909 1910 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 1911 limit = (1UL << order) / sizeof(struct hlist_head); 1912 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 1913 1914 for (i = 0; i < MAX_LINKS; i++) { 1915 struct nl_pid_hash *hash = &nl_table[i].hash; 1916 1917 hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); 1918 if (!hash->table) { 1919 while (i-- > 0) 1920 nl_pid_hash_free(nl_table[i].hash.table, 1921 1 * sizeof(*hash->table)); 1922 kfree(nl_table); 1923 goto panic; 1924 } 1925 memset(hash->table, 0, 1 * sizeof(*hash->table)); 1926 hash->max_shift = order; 1927 hash->shift = 0; 1928 hash->mask = 0; 1929 hash->rehash_time = jiffies; 1930 } 1931 1932 sock_register(&netlink_family_ops); 1933 register_pernet_subsys(&netlink_net_ops); 1934 /* The netlink device handler may be needed early. */ 1935 rtnetlink_init(); 1936 out: 1937 return err; 1938 panic: 1939 panic("netlink_init: Cannot allocate nl_table\n"); 1940 } 1941 1942 core_initcall(netlink_proto_init); 1943 1944 EXPORT_SYMBOL(netlink_ack); 1945 EXPORT_SYMBOL(netlink_rcv_skb); 1946 EXPORT_SYMBOL(netlink_broadcast); 1947 EXPORT_SYMBOL(netlink_dump_start); 1948 EXPORT_SYMBOL(netlink_kernel_create); 1949 EXPORT_SYMBOL(netlink_register_notifier); 1950 EXPORT_SYMBOL(netlink_set_nonroot); 1951 EXPORT_SYMBOL(netlink_unicast); 1952 EXPORT_SYMBOL(netlink_unregister_notifier); 1953 EXPORT_SYMBOL(nlmsg_notify); 1954