1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * Patrick McHardy <kaber@trash.net> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 14 * added netlink_proto_exit 15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 16 * use nlk_sk, as sk->protinfo is on a diet 8) 17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 18 * - inc module use count of module that owns 19 * the kernel socket in case userspace opens 20 * socket of same protocol 21 * - remove all module support, since netlink is 22 * mandatory if CONFIG_NET=y these days 23 */ 24 25 #include <linux/module.h> 26 27 #include <linux/capability.h> 28 #include <linux/kernel.h> 29 #include <linux/init.h> 30 #include <linux/signal.h> 31 #include <linux/sched.h> 32 #include <linux/errno.h> 33 #include <linux/string.h> 34 #include <linux/stat.h> 35 #include <linux/socket.h> 36 #include <linux/un.h> 37 #include <linux/fcntl.h> 38 #include <linux/termios.h> 39 #include <linux/sockios.h> 40 #include <linux/net.h> 41 #include <linux/fs.h> 42 #include <linux/slab.h> 43 #include <asm/uaccess.h> 44 #include <linux/skbuff.h> 45 #include <linux/netdevice.h> 46 #include <linux/rtnetlink.h> 47 #include <linux/proc_fs.h> 48 #include <linux/seq_file.h> 49 #include <linux/notifier.h> 50 #include <linux/security.h> 51 #include <linux/jhash.h> 52 #include <linux/jiffies.h> 53 #include <linux/random.h> 54 #include <linux/bitops.h> 55 #include <linux/mm.h> 56 #include <linux/types.h> 57 #include <linux/audit.h> 58 #include <linux/mutex.h> 59 #include <linux/vmalloc.h> 60 #include <asm/cacheflush.h> 61 62 #include <net/net_namespace.h> 63 #include <net/sock.h> 64 #include <net/scm.h> 65 #include <net/netlink.h> 66 67 #include "af_netlink.h" 68 69 struct listeners { 70 struct rcu_head rcu; 71 unsigned long masks[0]; 72 }; 73 74 /* state bits */ 75 #define NETLINK_CONGESTED 0x0 76 77 /* flags */ 78 #define NETLINK_KERNEL_SOCKET 0x1 79 #define NETLINK_RECV_PKTINFO 0x2 80 #define NETLINK_BROADCAST_SEND_ERROR 0x4 81 #define NETLINK_RECV_NO_ENOBUFS 0x8 82 83 static inline int netlink_is_kernel(struct sock *sk) 84 { 85 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 86 } 87 88 struct netlink_table *nl_table; 89 EXPORT_SYMBOL_GPL(nl_table); 90 91 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 92 93 static int netlink_dump(struct sock *sk); 94 static void netlink_skb_destructor(struct sk_buff *skb); 95 96 DEFINE_RWLOCK(nl_table_lock); 97 EXPORT_SYMBOL_GPL(nl_table_lock); 98 static atomic_t nl_table_users = ATOMIC_INIT(0); 99 100 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 101 102 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 103 104 static inline u32 netlink_group_mask(u32 group) 105 { 106 return group ? 1 << (group - 1) : 0; 107 } 108 109 static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) 110 { 111 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; 112 } 113 114 static void netlink_overrun(struct sock *sk) 115 { 116 struct netlink_sock *nlk = nlk_sk(sk); 117 118 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { 119 if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) { 120 sk->sk_err = ENOBUFS; 121 sk->sk_error_report(sk); 122 } 123 } 124 atomic_inc(&sk->sk_drops); 125 } 126 127 static void netlink_rcv_wake(struct sock *sk) 128 { 129 struct netlink_sock *nlk = nlk_sk(sk); 130 131 if (skb_queue_empty(&sk->sk_receive_queue)) 132 clear_bit(NETLINK_CONGESTED, &nlk->state); 133 if (!test_bit(NETLINK_CONGESTED, &nlk->state)) 134 wake_up_interruptible(&nlk->wait); 135 } 136 137 #ifdef CONFIG_NETLINK_MMAP 138 static bool netlink_skb_is_mmaped(const struct sk_buff *skb) 139 { 140 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; 141 } 142 143 static bool netlink_rx_is_mmaped(struct sock *sk) 144 { 145 return nlk_sk(sk)->rx_ring.pg_vec != NULL; 146 } 147 148 static bool netlink_tx_is_mmaped(struct sock *sk) 149 { 150 return nlk_sk(sk)->tx_ring.pg_vec != NULL; 151 } 152 153 static __pure struct page *pgvec_to_page(const void *addr) 154 { 155 if (is_vmalloc_addr(addr)) 156 return vmalloc_to_page(addr); 157 else 158 return virt_to_page(addr); 159 } 160 161 static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) 162 { 163 unsigned int i; 164 165 for (i = 0; i < len; i++) { 166 if (pg_vec[i] != NULL) { 167 if (is_vmalloc_addr(pg_vec[i])) 168 vfree(pg_vec[i]); 169 else 170 free_pages((unsigned long)pg_vec[i], order); 171 } 172 } 173 kfree(pg_vec); 174 } 175 176 static void *alloc_one_pg_vec_page(unsigned long order) 177 { 178 void *buffer; 179 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | 180 __GFP_NOWARN | __GFP_NORETRY; 181 182 buffer = (void *)__get_free_pages(gfp_flags, order); 183 if (buffer != NULL) 184 return buffer; 185 186 buffer = vzalloc((1 << order) * PAGE_SIZE); 187 if (buffer != NULL) 188 return buffer; 189 190 gfp_flags &= ~__GFP_NORETRY; 191 return (void *)__get_free_pages(gfp_flags, order); 192 } 193 194 static void **alloc_pg_vec(struct netlink_sock *nlk, 195 struct nl_mmap_req *req, unsigned int order) 196 { 197 unsigned int block_nr = req->nm_block_nr; 198 unsigned int i; 199 void **pg_vec, *ptr; 200 201 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); 202 if (pg_vec == NULL) 203 return NULL; 204 205 for (i = 0; i < block_nr; i++) { 206 pg_vec[i] = ptr = alloc_one_pg_vec_page(order); 207 if (pg_vec[i] == NULL) 208 goto err1; 209 } 210 211 return pg_vec; 212 err1: 213 free_pg_vec(pg_vec, order, block_nr); 214 return NULL; 215 } 216 217 static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, 218 bool closing, bool tx_ring) 219 { 220 struct netlink_sock *nlk = nlk_sk(sk); 221 struct netlink_ring *ring; 222 struct sk_buff_head *queue; 223 void **pg_vec = NULL; 224 unsigned int order = 0; 225 int err; 226 227 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; 228 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; 229 230 if (!closing) { 231 if (atomic_read(&nlk->mapped)) 232 return -EBUSY; 233 if (atomic_read(&ring->pending)) 234 return -EBUSY; 235 } 236 237 if (req->nm_block_nr) { 238 if (ring->pg_vec != NULL) 239 return -EBUSY; 240 241 if ((int)req->nm_block_size <= 0) 242 return -EINVAL; 243 if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) 244 return -EINVAL; 245 if (req->nm_frame_size < NL_MMAP_HDRLEN) 246 return -EINVAL; 247 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) 248 return -EINVAL; 249 250 ring->frames_per_block = req->nm_block_size / 251 req->nm_frame_size; 252 if (ring->frames_per_block == 0) 253 return -EINVAL; 254 if (ring->frames_per_block * req->nm_block_nr != 255 req->nm_frame_nr) 256 return -EINVAL; 257 258 order = get_order(req->nm_block_size); 259 pg_vec = alloc_pg_vec(nlk, req, order); 260 if (pg_vec == NULL) 261 return -ENOMEM; 262 } else { 263 if (req->nm_frame_nr) 264 return -EINVAL; 265 } 266 267 err = -EBUSY; 268 mutex_lock(&nlk->pg_vec_lock); 269 if (closing || atomic_read(&nlk->mapped) == 0) { 270 err = 0; 271 spin_lock_bh(&queue->lock); 272 273 ring->frame_max = req->nm_frame_nr - 1; 274 ring->head = 0; 275 ring->frame_size = req->nm_frame_size; 276 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; 277 278 swap(ring->pg_vec_len, req->nm_block_nr); 279 swap(ring->pg_vec_order, order); 280 swap(ring->pg_vec, pg_vec); 281 282 __skb_queue_purge(queue); 283 spin_unlock_bh(&queue->lock); 284 285 WARN_ON(atomic_read(&nlk->mapped)); 286 } 287 mutex_unlock(&nlk->pg_vec_lock); 288 289 if (pg_vec) 290 free_pg_vec(pg_vec, order, req->nm_block_nr); 291 return err; 292 } 293 294 static void netlink_mm_open(struct vm_area_struct *vma) 295 { 296 struct file *file = vma->vm_file; 297 struct socket *sock = file->private_data; 298 struct sock *sk = sock->sk; 299 300 if (sk) 301 atomic_inc(&nlk_sk(sk)->mapped); 302 } 303 304 static void netlink_mm_close(struct vm_area_struct *vma) 305 { 306 struct file *file = vma->vm_file; 307 struct socket *sock = file->private_data; 308 struct sock *sk = sock->sk; 309 310 if (sk) 311 atomic_dec(&nlk_sk(sk)->mapped); 312 } 313 314 static const struct vm_operations_struct netlink_mmap_ops = { 315 .open = netlink_mm_open, 316 .close = netlink_mm_close, 317 }; 318 319 static int netlink_mmap(struct file *file, struct socket *sock, 320 struct vm_area_struct *vma) 321 { 322 struct sock *sk = sock->sk; 323 struct netlink_sock *nlk = nlk_sk(sk); 324 struct netlink_ring *ring; 325 unsigned long start, size, expected; 326 unsigned int i; 327 int err = -EINVAL; 328 329 if (vma->vm_pgoff) 330 return -EINVAL; 331 332 mutex_lock(&nlk->pg_vec_lock); 333 334 expected = 0; 335 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { 336 if (ring->pg_vec == NULL) 337 continue; 338 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; 339 } 340 341 if (expected == 0) 342 goto out; 343 344 size = vma->vm_end - vma->vm_start; 345 if (size != expected) 346 goto out; 347 348 start = vma->vm_start; 349 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { 350 if (ring->pg_vec == NULL) 351 continue; 352 353 for (i = 0; i < ring->pg_vec_len; i++) { 354 struct page *page; 355 void *kaddr = ring->pg_vec[i]; 356 unsigned int pg_num; 357 358 for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { 359 page = pgvec_to_page(kaddr); 360 err = vm_insert_page(vma, start, page); 361 if (err < 0) 362 goto out; 363 start += PAGE_SIZE; 364 kaddr += PAGE_SIZE; 365 } 366 } 367 } 368 369 atomic_inc(&nlk->mapped); 370 vma->vm_ops = &netlink_mmap_ops; 371 err = 0; 372 out: 373 mutex_unlock(&nlk->pg_vec_lock); 374 return 0; 375 } 376 377 static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) 378 { 379 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 380 struct page *p_start, *p_end; 381 382 /* First page is flushed through netlink_{get,set}_status */ 383 p_start = pgvec_to_page(hdr + PAGE_SIZE); 384 p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); 385 while (p_start <= p_end) { 386 flush_dcache_page(p_start); 387 p_start++; 388 } 389 #endif 390 } 391 392 static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) 393 { 394 smp_rmb(); 395 flush_dcache_page(pgvec_to_page(hdr)); 396 return hdr->nm_status; 397 } 398 399 static void netlink_set_status(struct nl_mmap_hdr *hdr, 400 enum nl_mmap_status status) 401 { 402 hdr->nm_status = status; 403 flush_dcache_page(pgvec_to_page(hdr)); 404 smp_wmb(); 405 } 406 407 static struct nl_mmap_hdr * 408 __netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) 409 { 410 unsigned int pg_vec_pos, frame_off; 411 412 pg_vec_pos = pos / ring->frames_per_block; 413 frame_off = pos % ring->frames_per_block; 414 415 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); 416 } 417 418 static struct nl_mmap_hdr * 419 netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, 420 enum nl_mmap_status status) 421 { 422 struct nl_mmap_hdr *hdr; 423 424 hdr = __netlink_lookup_frame(ring, pos); 425 if (netlink_get_status(hdr) != status) 426 return NULL; 427 428 return hdr; 429 } 430 431 static struct nl_mmap_hdr * 432 netlink_current_frame(const struct netlink_ring *ring, 433 enum nl_mmap_status status) 434 { 435 return netlink_lookup_frame(ring, ring->head, status); 436 } 437 438 static struct nl_mmap_hdr * 439 netlink_previous_frame(const struct netlink_ring *ring, 440 enum nl_mmap_status status) 441 { 442 unsigned int prev; 443 444 prev = ring->head ? ring->head - 1 : ring->frame_max; 445 return netlink_lookup_frame(ring, prev, status); 446 } 447 448 static void netlink_increment_head(struct netlink_ring *ring) 449 { 450 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; 451 } 452 453 static void netlink_forward_ring(struct netlink_ring *ring) 454 { 455 unsigned int head = ring->head, pos = head; 456 const struct nl_mmap_hdr *hdr; 457 458 do { 459 hdr = __netlink_lookup_frame(ring, pos); 460 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) 461 break; 462 if (hdr->nm_status != NL_MMAP_STATUS_SKIP) 463 break; 464 netlink_increment_head(ring); 465 } while (ring->head != head); 466 } 467 468 static bool netlink_dump_space(struct netlink_sock *nlk) 469 { 470 struct netlink_ring *ring = &nlk->rx_ring; 471 struct nl_mmap_hdr *hdr; 472 unsigned int n; 473 474 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); 475 if (hdr == NULL) 476 return false; 477 478 n = ring->head + ring->frame_max / 2; 479 if (n > ring->frame_max) 480 n -= ring->frame_max; 481 482 hdr = __netlink_lookup_frame(ring, n); 483 484 return hdr->nm_status == NL_MMAP_STATUS_UNUSED; 485 } 486 487 static unsigned int netlink_poll(struct file *file, struct socket *sock, 488 poll_table *wait) 489 { 490 struct sock *sk = sock->sk; 491 struct netlink_sock *nlk = nlk_sk(sk); 492 unsigned int mask; 493 int err; 494 495 if (nlk->rx_ring.pg_vec != NULL) { 496 /* Memory mapped sockets don't call recvmsg(), so flow control 497 * for dumps is performed here. A dump is allowed to continue 498 * if at least half the ring is unused. 499 */ 500 while (nlk->cb != NULL && netlink_dump_space(nlk)) { 501 err = netlink_dump(sk); 502 if (err < 0) { 503 sk->sk_err = err; 504 sk->sk_error_report(sk); 505 break; 506 } 507 } 508 netlink_rcv_wake(sk); 509 } 510 511 mask = datagram_poll(file, sock, wait); 512 513 spin_lock_bh(&sk->sk_receive_queue.lock); 514 if (nlk->rx_ring.pg_vec) { 515 netlink_forward_ring(&nlk->rx_ring); 516 if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) 517 mask |= POLLIN | POLLRDNORM; 518 } 519 spin_unlock_bh(&sk->sk_receive_queue.lock); 520 521 spin_lock_bh(&sk->sk_write_queue.lock); 522 if (nlk->tx_ring.pg_vec) { 523 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) 524 mask |= POLLOUT | POLLWRNORM; 525 } 526 spin_unlock_bh(&sk->sk_write_queue.lock); 527 528 return mask; 529 } 530 531 static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) 532 { 533 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); 534 } 535 536 static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, 537 struct netlink_ring *ring, 538 struct nl_mmap_hdr *hdr) 539 { 540 unsigned int size; 541 void *data; 542 543 size = ring->frame_size - NL_MMAP_HDRLEN; 544 data = (void *)hdr + NL_MMAP_HDRLEN; 545 546 skb->head = data; 547 skb->data = data; 548 skb_reset_tail_pointer(skb); 549 skb->end = skb->tail + size; 550 skb->len = 0; 551 552 skb->destructor = netlink_skb_destructor; 553 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; 554 NETLINK_CB(skb).sk = sk; 555 } 556 557 static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, 558 u32 dst_portid, u32 dst_group, 559 struct sock_iocb *siocb) 560 { 561 struct netlink_sock *nlk = nlk_sk(sk); 562 struct netlink_ring *ring; 563 struct nl_mmap_hdr *hdr; 564 struct sk_buff *skb; 565 unsigned int maxlen; 566 bool excl = true; 567 int err = 0, len = 0; 568 569 /* Netlink messages are validated by the receiver before processing. 570 * In order to avoid userspace changing the contents of the message 571 * after validation, the socket and the ring may only be used by a 572 * single process, otherwise we fall back to copying. 573 */ 574 if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || 575 atomic_read(&nlk->mapped) > 1) 576 excl = false; 577 578 mutex_lock(&nlk->pg_vec_lock); 579 580 ring = &nlk->tx_ring; 581 maxlen = ring->frame_size - NL_MMAP_HDRLEN; 582 583 do { 584 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); 585 if (hdr == NULL) { 586 if (!(msg->msg_flags & MSG_DONTWAIT) && 587 atomic_read(&nlk->tx_ring.pending)) 588 schedule(); 589 continue; 590 } 591 if (hdr->nm_len > maxlen) { 592 err = -EINVAL; 593 goto out; 594 } 595 596 netlink_frame_flush_dcache(hdr); 597 598 if (likely(dst_portid == 0 && dst_group == 0 && excl)) { 599 skb = alloc_skb_head(GFP_KERNEL); 600 if (skb == NULL) { 601 err = -ENOBUFS; 602 goto out; 603 } 604 sock_hold(sk); 605 netlink_ring_setup_skb(skb, sk, ring, hdr); 606 NETLINK_CB(skb).flags |= NETLINK_SKB_TX; 607 __skb_put(skb, hdr->nm_len); 608 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); 609 atomic_inc(&ring->pending); 610 } else { 611 skb = alloc_skb(hdr->nm_len, GFP_KERNEL); 612 if (skb == NULL) { 613 err = -ENOBUFS; 614 goto out; 615 } 616 __skb_put(skb, hdr->nm_len); 617 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); 618 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); 619 } 620 621 netlink_increment_head(ring); 622 623 NETLINK_CB(skb).portid = nlk->portid; 624 NETLINK_CB(skb).dst_group = dst_group; 625 NETLINK_CB(skb).creds = siocb->scm->creds; 626 627 err = security_netlink_send(sk, skb); 628 if (err) { 629 kfree_skb(skb); 630 goto out; 631 } 632 633 if (unlikely(dst_group)) { 634 atomic_inc(&skb->users); 635 netlink_broadcast(sk, skb, dst_portid, dst_group, 636 GFP_KERNEL); 637 } 638 err = netlink_unicast(sk, skb, dst_portid, 639 msg->msg_flags & MSG_DONTWAIT); 640 if (err < 0) 641 goto out; 642 len += err; 643 644 } while (hdr != NULL || 645 (!(msg->msg_flags & MSG_DONTWAIT) && 646 atomic_read(&nlk->tx_ring.pending))); 647 648 if (len > 0) 649 err = len; 650 out: 651 mutex_unlock(&nlk->pg_vec_lock); 652 return err; 653 } 654 655 static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) 656 { 657 struct nl_mmap_hdr *hdr; 658 659 hdr = netlink_mmap_hdr(skb); 660 hdr->nm_len = skb->len; 661 hdr->nm_group = NETLINK_CB(skb).dst_group; 662 hdr->nm_pid = NETLINK_CB(skb).creds.pid; 663 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); 664 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); 665 netlink_frame_flush_dcache(hdr); 666 netlink_set_status(hdr, NL_MMAP_STATUS_VALID); 667 668 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; 669 kfree_skb(skb); 670 } 671 672 static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) 673 { 674 struct netlink_sock *nlk = nlk_sk(sk); 675 struct netlink_ring *ring = &nlk->rx_ring; 676 struct nl_mmap_hdr *hdr; 677 678 spin_lock_bh(&sk->sk_receive_queue.lock); 679 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); 680 if (hdr == NULL) { 681 spin_unlock_bh(&sk->sk_receive_queue.lock); 682 kfree_skb(skb); 683 netlink_overrun(sk); 684 return; 685 } 686 netlink_increment_head(ring); 687 __skb_queue_tail(&sk->sk_receive_queue, skb); 688 spin_unlock_bh(&sk->sk_receive_queue.lock); 689 690 hdr->nm_len = skb->len; 691 hdr->nm_group = NETLINK_CB(skb).dst_group; 692 hdr->nm_pid = NETLINK_CB(skb).creds.pid; 693 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); 694 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); 695 netlink_set_status(hdr, NL_MMAP_STATUS_COPY); 696 } 697 698 #else /* CONFIG_NETLINK_MMAP */ 699 #define netlink_skb_is_mmaped(skb) false 700 #define netlink_rx_is_mmaped(sk) false 701 #define netlink_tx_is_mmaped(sk) false 702 #define netlink_mmap sock_no_mmap 703 #define netlink_poll datagram_poll 704 #define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 705 #endif /* CONFIG_NETLINK_MMAP */ 706 707 static void netlink_destroy_callback(struct netlink_callback *cb) 708 { 709 kfree_skb(cb->skb); 710 kfree(cb); 711 } 712 713 static void netlink_consume_callback(struct netlink_callback *cb) 714 { 715 consume_skb(cb->skb); 716 kfree(cb); 717 } 718 719 static void netlink_skb_destructor(struct sk_buff *skb) 720 { 721 #ifdef CONFIG_NETLINK_MMAP 722 struct nl_mmap_hdr *hdr; 723 struct netlink_ring *ring; 724 struct sock *sk; 725 726 /* If a packet from the kernel to userspace was freed because of an 727 * error without being delivered to userspace, the kernel must reset 728 * the status. In the direction userspace to kernel, the status is 729 * always reset here after the packet was processed and freed. 730 */ 731 if (netlink_skb_is_mmaped(skb)) { 732 hdr = netlink_mmap_hdr(skb); 733 sk = NETLINK_CB(skb).sk; 734 735 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { 736 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); 737 ring = &nlk_sk(sk)->tx_ring; 738 } else { 739 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { 740 hdr->nm_len = 0; 741 netlink_set_status(hdr, NL_MMAP_STATUS_VALID); 742 } 743 ring = &nlk_sk(sk)->rx_ring; 744 } 745 746 WARN_ON(atomic_read(&ring->pending) == 0); 747 atomic_dec(&ring->pending); 748 sock_put(sk); 749 750 skb->head = NULL; 751 } 752 #endif 753 if (skb->sk != NULL) 754 sock_rfree(skb); 755 } 756 757 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 758 { 759 WARN_ON(skb->sk != NULL); 760 skb->sk = sk; 761 skb->destructor = netlink_skb_destructor; 762 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 763 sk_mem_charge(sk, skb->truesize); 764 } 765 766 static void netlink_sock_destruct(struct sock *sk) 767 { 768 struct netlink_sock *nlk = nlk_sk(sk); 769 770 if (nlk->cb) { 771 if (nlk->cb->done) 772 nlk->cb->done(nlk->cb); 773 774 module_put(nlk->cb->module); 775 netlink_destroy_callback(nlk->cb); 776 } 777 778 skb_queue_purge(&sk->sk_receive_queue); 779 #ifdef CONFIG_NETLINK_MMAP 780 if (1) { 781 struct nl_mmap_req req; 782 783 memset(&req, 0, sizeof(req)); 784 if (nlk->rx_ring.pg_vec) 785 netlink_set_ring(sk, &req, true, false); 786 memset(&req, 0, sizeof(req)); 787 if (nlk->tx_ring.pg_vec) 788 netlink_set_ring(sk, &req, true, true); 789 } 790 #endif /* CONFIG_NETLINK_MMAP */ 791 792 if (!sock_flag(sk, SOCK_DEAD)) { 793 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 794 return; 795 } 796 797 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 798 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 799 WARN_ON(nlk_sk(sk)->groups); 800 } 801 802 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 803 * SMP. Look, when several writers sleep and reader wakes them up, all but one 804 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 805 * this, _but_ remember, it adds useless work on UP machines. 806 */ 807 808 void netlink_table_grab(void) 809 __acquires(nl_table_lock) 810 { 811 might_sleep(); 812 813 write_lock_irq(&nl_table_lock); 814 815 if (atomic_read(&nl_table_users)) { 816 DECLARE_WAITQUEUE(wait, current); 817 818 add_wait_queue_exclusive(&nl_table_wait, &wait); 819 for (;;) { 820 set_current_state(TASK_UNINTERRUPTIBLE); 821 if (atomic_read(&nl_table_users) == 0) 822 break; 823 write_unlock_irq(&nl_table_lock); 824 schedule(); 825 write_lock_irq(&nl_table_lock); 826 } 827 828 __set_current_state(TASK_RUNNING); 829 remove_wait_queue(&nl_table_wait, &wait); 830 } 831 } 832 833 void netlink_table_ungrab(void) 834 __releases(nl_table_lock) 835 { 836 write_unlock_irq(&nl_table_lock); 837 wake_up(&nl_table_wait); 838 } 839 840 static inline void 841 netlink_lock_table(void) 842 { 843 /* read_lock() synchronizes us to netlink_table_grab */ 844 845 read_lock(&nl_table_lock); 846 atomic_inc(&nl_table_users); 847 read_unlock(&nl_table_lock); 848 } 849 850 static inline void 851 netlink_unlock_table(void) 852 { 853 if (atomic_dec_and_test(&nl_table_users)) 854 wake_up(&nl_table_wait); 855 } 856 857 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 858 { 859 struct nl_portid_hash *hash = &nl_table[protocol].hash; 860 struct hlist_head *head; 861 struct sock *sk; 862 863 read_lock(&nl_table_lock); 864 head = nl_portid_hashfn(hash, portid); 865 sk_for_each(sk, head) { 866 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { 867 sock_hold(sk); 868 goto found; 869 } 870 } 871 sk = NULL; 872 found: 873 read_unlock(&nl_table_lock); 874 return sk; 875 } 876 877 static struct hlist_head *nl_portid_hash_zalloc(size_t size) 878 { 879 if (size <= PAGE_SIZE) 880 return kzalloc(size, GFP_ATOMIC); 881 else 882 return (struct hlist_head *) 883 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 884 get_order(size)); 885 } 886 887 static void nl_portid_hash_free(struct hlist_head *table, size_t size) 888 { 889 if (size <= PAGE_SIZE) 890 kfree(table); 891 else 892 free_pages((unsigned long)table, get_order(size)); 893 } 894 895 static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) 896 { 897 unsigned int omask, mask, shift; 898 size_t osize, size; 899 struct hlist_head *otable, *table; 900 int i; 901 902 omask = mask = hash->mask; 903 osize = size = (mask + 1) * sizeof(*table); 904 shift = hash->shift; 905 906 if (grow) { 907 if (++shift > hash->max_shift) 908 return 0; 909 mask = mask * 2 + 1; 910 size *= 2; 911 } 912 913 table = nl_portid_hash_zalloc(size); 914 if (!table) 915 return 0; 916 917 otable = hash->table; 918 hash->table = table; 919 hash->mask = mask; 920 hash->shift = shift; 921 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 922 923 for (i = 0; i <= omask; i++) { 924 struct sock *sk; 925 struct hlist_node *tmp; 926 927 sk_for_each_safe(sk, tmp, &otable[i]) 928 __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); 929 } 930 931 nl_portid_hash_free(otable, osize); 932 hash->rehash_time = jiffies + 10 * 60 * HZ; 933 return 1; 934 } 935 936 static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) 937 { 938 int avg = hash->entries >> hash->shift; 939 940 if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) 941 return 1; 942 943 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 944 nl_portid_hash_rehash(hash, 0); 945 return 1; 946 } 947 948 return 0; 949 } 950 951 static const struct proto_ops netlink_ops; 952 953 static void 954 netlink_update_listeners(struct sock *sk) 955 { 956 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 957 unsigned long mask; 958 unsigned int i; 959 struct listeners *listeners; 960 961 listeners = nl_deref_protected(tbl->listeners); 962 if (!listeners) 963 return; 964 965 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 966 mask = 0; 967 sk_for_each_bound(sk, &tbl->mc_list) { 968 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 969 mask |= nlk_sk(sk)->groups[i]; 970 } 971 listeners->masks[i] = mask; 972 } 973 /* this function is only called with the netlink table "grabbed", which 974 * makes sure updates are visible before bind or setsockopt return. */ 975 } 976 977 static int netlink_insert(struct sock *sk, struct net *net, u32 portid) 978 { 979 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; 980 struct hlist_head *head; 981 int err = -EADDRINUSE; 982 struct sock *osk; 983 int len; 984 985 netlink_table_grab(); 986 head = nl_portid_hashfn(hash, portid); 987 len = 0; 988 sk_for_each(osk, head) { 989 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) 990 break; 991 len++; 992 } 993 if (osk) 994 goto err; 995 996 err = -EBUSY; 997 if (nlk_sk(sk)->portid) 998 goto err; 999 1000 err = -ENOMEM; 1001 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 1002 goto err; 1003 1004 if (len && nl_portid_hash_dilute(hash, len)) 1005 head = nl_portid_hashfn(hash, portid); 1006 hash->entries++; 1007 nlk_sk(sk)->portid = portid; 1008 sk_add_node(sk, head); 1009 err = 0; 1010 1011 err: 1012 netlink_table_ungrab(); 1013 return err; 1014 } 1015 1016 static void netlink_remove(struct sock *sk) 1017 { 1018 netlink_table_grab(); 1019 if (sk_del_node_init(sk)) 1020 nl_table[sk->sk_protocol].hash.entries--; 1021 if (nlk_sk(sk)->subscriptions) 1022 __sk_del_bind_node(sk); 1023 netlink_table_ungrab(); 1024 } 1025 1026 static struct proto netlink_proto = { 1027 .name = "NETLINK", 1028 .owner = THIS_MODULE, 1029 .obj_size = sizeof(struct netlink_sock), 1030 }; 1031 1032 static int __netlink_create(struct net *net, struct socket *sock, 1033 struct mutex *cb_mutex, int protocol) 1034 { 1035 struct sock *sk; 1036 struct netlink_sock *nlk; 1037 1038 sock->ops = &netlink_ops; 1039 1040 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 1041 if (!sk) 1042 return -ENOMEM; 1043 1044 sock_init_data(sock, sk); 1045 1046 nlk = nlk_sk(sk); 1047 if (cb_mutex) { 1048 nlk->cb_mutex = cb_mutex; 1049 } else { 1050 nlk->cb_mutex = &nlk->cb_def_mutex; 1051 mutex_init(nlk->cb_mutex); 1052 } 1053 init_waitqueue_head(&nlk->wait); 1054 #ifdef CONFIG_NETLINK_MMAP 1055 mutex_init(&nlk->pg_vec_lock); 1056 #endif 1057 1058 sk->sk_destruct = netlink_sock_destruct; 1059 sk->sk_protocol = protocol; 1060 return 0; 1061 } 1062 1063 static int netlink_create(struct net *net, struct socket *sock, int protocol, 1064 int kern) 1065 { 1066 struct module *module = NULL; 1067 struct mutex *cb_mutex; 1068 struct netlink_sock *nlk; 1069 void (*bind)(int group); 1070 int err = 0; 1071 1072 sock->state = SS_UNCONNECTED; 1073 1074 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 1075 return -ESOCKTNOSUPPORT; 1076 1077 if (protocol < 0 || protocol >= MAX_LINKS) 1078 return -EPROTONOSUPPORT; 1079 1080 netlink_lock_table(); 1081 #ifdef CONFIG_MODULES 1082 if (!nl_table[protocol].registered) { 1083 netlink_unlock_table(); 1084 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 1085 netlink_lock_table(); 1086 } 1087 #endif 1088 if (nl_table[protocol].registered && 1089 try_module_get(nl_table[protocol].module)) 1090 module = nl_table[protocol].module; 1091 else 1092 err = -EPROTONOSUPPORT; 1093 cb_mutex = nl_table[protocol].cb_mutex; 1094 bind = nl_table[protocol].bind; 1095 netlink_unlock_table(); 1096 1097 if (err < 0) 1098 goto out; 1099 1100 err = __netlink_create(net, sock, cb_mutex, protocol); 1101 if (err < 0) 1102 goto out_module; 1103 1104 local_bh_disable(); 1105 sock_prot_inuse_add(net, &netlink_proto, 1); 1106 local_bh_enable(); 1107 1108 nlk = nlk_sk(sock->sk); 1109 nlk->module = module; 1110 nlk->netlink_bind = bind; 1111 out: 1112 return err; 1113 1114 out_module: 1115 module_put(module); 1116 goto out; 1117 } 1118 1119 static int netlink_release(struct socket *sock) 1120 { 1121 struct sock *sk = sock->sk; 1122 struct netlink_sock *nlk; 1123 1124 if (!sk) 1125 return 0; 1126 1127 netlink_remove(sk); 1128 sock_orphan(sk); 1129 nlk = nlk_sk(sk); 1130 1131 /* 1132 * OK. Socket is unlinked, any packets that arrive now 1133 * will be purged. 1134 */ 1135 1136 sock->sk = NULL; 1137 wake_up_interruptible_all(&nlk->wait); 1138 1139 skb_queue_purge(&sk->sk_write_queue); 1140 1141 if (nlk->portid) { 1142 struct netlink_notify n = { 1143 .net = sock_net(sk), 1144 .protocol = sk->sk_protocol, 1145 .portid = nlk->portid, 1146 }; 1147 atomic_notifier_call_chain(&netlink_chain, 1148 NETLINK_URELEASE, &n); 1149 } 1150 1151 module_put(nlk->module); 1152 1153 netlink_table_grab(); 1154 if (netlink_is_kernel(sk)) { 1155 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 1156 if (--nl_table[sk->sk_protocol].registered == 0) { 1157 struct listeners *old; 1158 1159 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 1160 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 1161 kfree_rcu(old, rcu); 1162 nl_table[sk->sk_protocol].module = NULL; 1163 nl_table[sk->sk_protocol].bind = NULL; 1164 nl_table[sk->sk_protocol].flags = 0; 1165 nl_table[sk->sk_protocol].registered = 0; 1166 } 1167 } else if (nlk->subscriptions) { 1168 netlink_update_listeners(sk); 1169 } 1170 netlink_table_ungrab(); 1171 1172 kfree(nlk->groups); 1173 nlk->groups = NULL; 1174 1175 local_bh_disable(); 1176 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 1177 local_bh_enable(); 1178 sock_put(sk); 1179 return 0; 1180 } 1181 1182 static int netlink_autobind(struct socket *sock) 1183 { 1184 struct sock *sk = sock->sk; 1185 struct net *net = sock_net(sk); 1186 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; 1187 struct hlist_head *head; 1188 struct sock *osk; 1189 s32 portid = task_tgid_vnr(current); 1190 int err; 1191 static s32 rover = -4097; 1192 1193 retry: 1194 cond_resched(); 1195 netlink_table_grab(); 1196 head = nl_portid_hashfn(hash, portid); 1197 sk_for_each(osk, head) { 1198 if (!net_eq(sock_net(osk), net)) 1199 continue; 1200 if (nlk_sk(osk)->portid == portid) { 1201 /* Bind collision, search negative portid values. */ 1202 portid = rover--; 1203 if (rover > -4097) 1204 rover = -4097; 1205 netlink_table_ungrab(); 1206 goto retry; 1207 } 1208 } 1209 netlink_table_ungrab(); 1210 1211 err = netlink_insert(sk, net, portid); 1212 if (err == -EADDRINUSE) 1213 goto retry; 1214 1215 /* If 2 threads race to autobind, that is fine. */ 1216 if (err == -EBUSY) 1217 err = 0; 1218 1219 return err; 1220 } 1221 1222 static inline int netlink_capable(const struct socket *sock, unsigned int flag) 1223 { 1224 return (nl_table[sock->sk->sk_protocol].flags & flag) || 1225 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 1226 } 1227 1228 static void 1229 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 1230 { 1231 struct netlink_sock *nlk = nlk_sk(sk); 1232 1233 if (nlk->subscriptions && !subscriptions) 1234 __sk_del_bind_node(sk); 1235 else if (!nlk->subscriptions && subscriptions) 1236 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 1237 nlk->subscriptions = subscriptions; 1238 } 1239 1240 static int netlink_realloc_groups(struct sock *sk) 1241 { 1242 struct netlink_sock *nlk = nlk_sk(sk); 1243 unsigned int groups; 1244 unsigned long *new_groups; 1245 int err = 0; 1246 1247 netlink_table_grab(); 1248 1249 groups = nl_table[sk->sk_protocol].groups; 1250 if (!nl_table[sk->sk_protocol].registered) { 1251 err = -ENOENT; 1252 goto out_unlock; 1253 } 1254 1255 if (nlk->ngroups >= groups) 1256 goto out_unlock; 1257 1258 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 1259 if (new_groups == NULL) { 1260 err = -ENOMEM; 1261 goto out_unlock; 1262 } 1263 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 1264 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 1265 1266 nlk->groups = new_groups; 1267 nlk->ngroups = groups; 1268 out_unlock: 1269 netlink_table_ungrab(); 1270 return err; 1271 } 1272 1273 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 1274 int addr_len) 1275 { 1276 struct sock *sk = sock->sk; 1277 struct net *net = sock_net(sk); 1278 struct netlink_sock *nlk = nlk_sk(sk); 1279 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1280 int err; 1281 1282 if (addr_len < sizeof(struct sockaddr_nl)) 1283 return -EINVAL; 1284 1285 if (nladdr->nl_family != AF_NETLINK) 1286 return -EINVAL; 1287 1288 /* Only superuser is allowed to listen multicasts */ 1289 if (nladdr->nl_groups) { 1290 if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) 1291 return -EPERM; 1292 err = netlink_realloc_groups(sk); 1293 if (err) 1294 return err; 1295 } 1296 1297 if (nlk->portid) { 1298 if (nladdr->nl_pid != nlk->portid) 1299 return -EINVAL; 1300 } else { 1301 err = nladdr->nl_pid ? 1302 netlink_insert(sk, net, nladdr->nl_pid) : 1303 netlink_autobind(sock); 1304 if (err) 1305 return err; 1306 } 1307 1308 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 1309 return 0; 1310 1311 netlink_table_grab(); 1312 netlink_update_subscriptions(sk, nlk->subscriptions + 1313 hweight32(nladdr->nl_groups) - 1314 hweight32(nlk->groups[0])); 1315 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 1316 netlink_update_listeners(sk); 1317 netlink_table_ungrab(); 1318 1319 if (nlk->netlink_bind && nlk->groups[0]) { 1320 int i; 1321 1322 for (i=0; i<nlk->ngroups; i++) { 1323 if (test_bit(i, nlk->groups)) 1324 nlk->netlink_bind(i); 1325 } 1326 } 1327 1328 return 0; 1329 } 1330 1331 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 1332 int alen, int flags) 1333 { 1334 int err = 0; 1335 struct sock *sk = sock->sk; 1336 struct netlink_sock *nlk = nlk_sk(sk); 1337 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1338 1339 if (alen < sizeof(addr->sa_family)) 1340 return -EINVAL; 1341 1342 if (addr->sa_family == AF_UNSPEC) { 1343 sk->sk_state = NETLINK_UNCONNECTED; 1344 nlk->dst_portid = 0; 1345 nlk->dst_group = 0; 1346 return 0; 1347 } 1348 if (addr->sa_family != AF_NETLINK) 1349 return -EINVAL; 1350 1351 /* Only superuser is allowed to send multicasts */ 1352 if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) 1353 return -EPERM; 1354 1355 if (!nlk->portid) 1356 err = netlink_autobind(sock); 1357 1358 if (err == 0) { 1359 sk->sk_state = NETLINK_CONNECTED; 1360 nlk->dst_portid = nladdr->nl_pid; 1361 nlk->dst_group = ffs(nladdr->nl_groups); 1362 } 1363 1364 return err; 1365 } 1366 1367 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1368 int *addr_len, int peer) 1369 { 1370 struct sock *sk = sock->sk; 1371 struct netlink_sock *nlk = nlk_sk(sk); 1372 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 1373 1374 nladdr->nl_family = AF_NETLINK; 1375 nladdr->nl_pad = 0; 1376 *addr_len = sizeof(*nladdr); 1377 1378 if (peer) { 1379 nladdr->nl_pid = nlk->dst_portid; 1380 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 1381 } else { 1382 nladdr->nl_pid = nlk->portid; 1383 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1384 } 1385 return 0; 1386 } 1387 1388 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1389 { 1390 struct sock *sock; 1391 struct netlink_sock *nlk; 1392 1393 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 1394 if (!sock) 1395 return ERR_PTR(-ECONNREFUSED); 1396 1397 /* Don't bother queuing skb if kernel socket has no input function */ 1398 nlk = nlk_sk(sock); 1399 if (sock->sk_state == NETLINK_CONNECTED && 1400 nlk->dst_portid != nlk_sk(ssk)->portid) { 1401 sock_put(sock); 1402 return ERR_PTR(-ECONNREFUSED); 1403 } 1404 return sock; 1405 } 1406 1407 struct sock *netlink_getsockbyfilp(struct file *filp) 1408 { 1409 struct inode *inode = file_inode(filp); 1410 struct sock *sock; 1411 1412 if (!S_ISSOCK(inode->i_mode)) 1413 return ERR_PTR(-ENOTSOCK); 1414 1415 sock = SOCKET_I(inode)->sk; 1416 if (sock->sk_family != AF_NETLINK) 1417 return ERR_PTR(-EINVAL); 1418 1419 sock_hold(sock); 1420 return sock; 1421 } 1422 1423 /* 1424 * Attach a skb to a netlink socket. 1425 * The caller must hold a reference to the destination socket. On error, the 1426 * reference is dropped. The skb is not send to the destination, just all 1427 * all error checks are performed and memory in the queue is reserved. 1428 * Return values: 1429 * < 0: error. skb freed, reference to sock dropped. 1430 * 0: continue 1431 * 1: repeat lookup - reference dropped while waiting for socket memory. 1432 */ 1433 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1434 long *timeo, struct sock *ssk) 1435 { 1436 struct netlink_sock *nlk; 1437 1438 nlk = nlk_sk(sk); 1439 1440 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1441 test_bit(NETLINK_CONGESTED, &nlk->state)) && 1442 !netlink_skb_is_mmaped(skb)) { 1443 DECLARE_WAITQUEUE(wait, current); 1444 if (!*timeo) { 1445 if (!ssk || netlink_is_kernel(ssk)) 1446 netlink_overrun(sk); 1447 sock_put(sk); 1448 kfree_skb(skb); 1449 return -EAGAIN; 1450 } 1451 1452 __set_current_state(TASK_INTERRUPTIBLE); 1453 add_wait_queue(&nlk->wait, &wait); 1454 1455 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1456 test_bit(NETLINK_CONGESTED, &nlk->state)) && 1457 !sock_flag(sk, SOCK_DEAD)) 1458 *timeo = schedule_timeout(*timeo); 1459 1460 __set_current_state(TASK_RUNNING); 1461 remove_wait_queue(&nlk->wait, &wait); 1462 sock_put(sk); 1463 1464 if (signal_pending(current)) { 1465 kfree_skb(skb); 1466 return sock_intr_errno(*timeo); 1467 } 1468 return 1; 1469 } 1470 netlink_skb_set_owner_r(skb, sk); 1471 return 0; 1472 } 1473 1474 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1475 { 1476 int len = skb->len; 1477 1478 #ifdef CONFIG_NETLINK_MMAP 1479 if (netlink_skb_is_mmaped(skb)) 1480 netlink_queue_mmaped_skb(sk, skb); 1481 else if (netlink_rx_is_mmaped(sk)) 1482 netlink_ring_set_copied(sk, skb); 1483 else 1484 #endif /* CONFIG_NETLINK_MMAP */ 1485 skb_queue_tail(&sk->sk_receive_queue, skb); 1486 sk->sk_data_ready(sk, len); 1487 return len; 1488 } 1489 1490 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1491 { 1492 int len = __netlink_sendskb(sk, skb); 1493 1494 sock_put(sk); 1495 return len; 1496 } 1497 1498 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 1499 { 1500 kfree_skb(skb); 1501 sock_put(sk); 1502 } 1503 1504 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 1505 { 1506 int delta; 1507 1508 WARN_ON(skb->sk != NULL); 1509 if (netlink_skb_is_mmaped(skb)) 1510 return skb; 1511 1512 delta = skb->end - skb->tail; 1513 if (delta * 2 < skb->truesize) 1514 return skb; 1515 1516 if (skb_shared(skb)) { 1517 struct sk_buff *nskb = skb_clone(skb, allocation); 1518 if (!nskb) 1519 return skb; 1520 consume_skb(skb); 1521 skb = nskb; 1522 } 1523 1524 if (!pskb_expand_head(skb, 0, -delta, allocation)) 1525 skb->truesize -= delta; 1526 1527 return skb; 1528 } 1529 1530 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1531 struct sock *ssk) 1532 { 1533 int ret; 1534 struct netlink_sock *nlk = nlk_sk(sk); 1535 1536 ret = -ECONNREFUSED; 1537 if (nlk->netlink_rcv != NULL) { 1538 ret = skb->len; 1539 netlink_skb_set_owner_r(skb, sk); 1540 NETLINK_CB(skb).sk = ssk; 1541 nlk->netlink_rcv(skb); 1542 consume_skb(skb); 1543 } else { 1544 kfree_skb(skb); 1545 } 1546 sock_put(sk); 1547 return ret; 1548 } 1549 1550 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 1551 u32 portid, int nonblock) 1552 { 1553 struct sock *sk; 1554 int err; 1555 long timeo; 1556 1557 skb = netlink_trim(skb, gfp_any()); 1558 1559 timeo = sock_sndtimeo(ssk, nonblock); 1560 retry: 1561 sk = netlink_getsockbyportid(ssk, portid); 1562 if (IS_ERR(sk)) { 1563 kfree_skb(skb); 1564 return PTR_ERR(sk); 1565 } 1566 if (netlink_is_kernel(sk)) 1567 return netlink_unicast_kernel(sk, skb, ssk); 1568 1569 if (sk_filter(sk, skb)) { 1570 err = skb->len; 1571 kfree_skb(skb); 1572 sock_put(sk); 1573 return err; 1574 } 1575 1576 err = netlink_attachskb(sk, skb, &timeo, ssk); 1577 if (err == 1) 1578 goto retry; 1579 if (err) 1580 return err; 1581 1582 return netlink_sendskb(sk, skb); 1583 } 1584 EXPORT_SYMBOL(netlink_unicast); 1585 1586 struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, 1587 u32 dst_portid, gfp_t gfp_mask) 1588 { 1589 #ifdef CONFIG_NETLINK_MMAP 1590 struct sock *sk = NULL; 1591 struct sk_buff *skb; 1592 struct netlink_ring *ring; 1593 struct nl_mmap_hdr *hdr; 1594 unsigned int maxlen; 1595 1596 sk = netlink_getsockbyportid(ssk, dst_portid); 1597 if (IS_ERR(sk)) 1598 goto out; 1599 1600 ring = &nlk_sk(sk)->rx_ring; 1601 /* fast-path without atomic ops for common case: non-mmaped receiver */ 1602 if (ring->pg_vec == NULL) 1603 goto out_put; 1604 1605 skb = alloc_skb_head(gfp_mask); 1606 if (skb == NULL) 1607 goto err1; 1608 1609 spin_lock_bh(&sk->sk_receive_queue.lock); 1610 /* check again under lock */ 1611 if (ring->pg_vec == NULL) 1612 goto out_free; 1613 1614 maxlen = ring->frame_size - NL_MMAP_HDRLEN; 1615 if (maxlen < size) 1616 goto out_free; 1617 1618 netlink_forward_ring(ring); 1619 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); 1620 if (hdr == NULL) 1621 goto err2; 1622 netlink_ring_setup_skb(skb, sk, ring, hdr); 1623 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); 1624 atomic_inc(&ring->pending); 1625 netlink_increment_head(ring); 1626 1627 spin_unlock_bh(&sk->sk_receive_queue.lock); 1628 return skb; 1629 1630 err2: 1631 kfree_skb(skb); 1632 spin_unlock_bh(&sk->sk_receive_queue.lock); 1633 netlink_overrun(sk); 1634 err1: 1635 sock_put(sk); 1636 return NULL; 1637 1638 out_free: 1639 kfree_skb(skb); 1640 spin_unlock_bh(&sk->sk_receive_queue.lock); 1641 out_put: 1642 sock_put(sk); 1643 out: 1644 #endif 1645 return alloc_skb(size, gfp_mask); 1646 } 1647 EXPORT_SYMBOL_GPL(netlink_alloc_skb); 1648 1649 int netlink_has_listeners(struct sock *sk, unsigned int group) 1650 { 1651 int res = 0; 1652 struct listeners *listeners; 1653 1654 BUG_ON(!netlink_is_kernel(sk)); 1655 1656 rcu_read_lock(); 1657 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 1658 1659 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 1660 res = test_bit(group - 1, listeners->masks); 1661 1662 rcu_read_unlock(); 1663 1664 return res; 1665 } 1666 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1667 1668 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1669 { 1670 struct netlink_sock *nlk = nlk_sk(sk); 1671 1672 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1673 !test_bit(NETLINK_CONGESTED, &nlk->state)) { 1674 netlink_skb_set_owner_r(skb, sk); 1675 __netlink_sendskb(sk, skb); 1676 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1677 } 1678 return -1; 1679 } 1680 1681 struct netlink_broadcast_data { 1682 struct sock *exclude_sk; 1683 struct net *net; 1684 u32 portid; 1685 u32 group; 1686 int failure; 1687 int delivery_failure; 1688 int congested; 1689 int delivered; 1690 gfp_t allocation; 1691 struct sk_buff *skb, *skb2; 1692 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); 1693 void *tx_data; 1694 }; 1695 1696 static int do_one_broadcast(struct sock *sk, 1697 struct netlink_broadcast_data *p) 1698 { 1699 struct netlink_sock *nlk = nlk_sk(sk); 1700 int val; 1701 1702 if (p->exclude_sk == sk) 1703 goto out; 1704 1705 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1706 !test_bit(p->group - 1, nlk->groups)) 1707 goto out; 1708 1709 if (!net_eq(sock_net(sk), p->net)) 1710 goto out; 1711 1712 if (p->failure) { 1713 netlink_overrun(sk); 1714 goto out; 1715 } 1716 1717 sock_hold(sk); 1718 if (p->skb2 == NULL) { 1719 if (skb_shared(p->skb)) { 1720 p->skb2 = skb_clone(p->skb, p->allocation); 1721 } else { 1722 p->skb2 = skb_get(p->skb); 1723 /* 1724 * skb ownership may have been set when 1725 * delivered to a previous socket. 1726 */ 1727 skb_orphan(p->skb2); 1728 } 1729 } 1730 if (p->skb2 == NULL) { 1731 netlink_overrun(sk); 1732 /* Clone failed. Notify ALL listeners. */ 1733 p->failure = 1; 1734 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1735 p->delivery_failure = 1; 1736 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1737 kfree_skb(p->skb2); 1738 p->skb2 = NULL; 1739 } else if (sk_filter(sk, p->skb2)) { 1740 kfree_skb(p->skb2); 1741 p->skb2 = NULL; 1742 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1743 netlink_overrun(sk); 1744 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1745 p->delivery_failure = 1; 1746 } else { 1747 p->congested |= val; 1748 p->delivered = 1; 1749 p->skb2 = NULL; 1750 } 1751 sock_put(sk); 1752 1753 out: 1754 return 0; 1755 } 1756 1757 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, 1758 u32 group, gfp_t allocation, 1759 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), 1760 void *filter_data) 1761 { 1762 struct net *net = sock_net(ssk); 1763 struct netlink_broadcast_data info; 1764 struct sock *sk; 1765 1766 skb = netlink_trim(skb, allocation); 1767 1768 info.exclude_sk = ssk; 1769 info.net = net; 1770 info.portid = portid; 1771 info.group = group; 1772 info.failure = 0; 1773 info.delivery_failure = 0; 1774 info.congested = 0; 1775 info.delivered = 0; 1776 info.allocation = allocation; 1777 info.skb = skb; 1778 info.skb2 = NULL; 1779 info.tx_filter = filter; 1780 info.tx_data = filter_data; 1781 1782 /* While we sleep in clone, do not allow to change socket list */ 1783 1784 netlink_lock_table(); 1785 1786 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1787 do_one_broadcast(sk, &info); 1788 1789 consume_skb(skb); 1790 1791 netlink_unlock_table(); 1792 1793 if (info.delivery_failure) { 1794 kfree_skb(info.skb2); 1795 return -ENOBUFS; 1796 } 1797 consume_skb(info.skb2); 1798 1799 if (info.delivered) { 1800 if (info.congested && (allocation & __GFP_WAIT)) 1801 yield(); 1802 return 0; 1803 } 1804 return -ESRCH; 1805 } 1806 EXPORT_SYMBOL(netlink_broadcast_filtered); 1807 1808 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1809 u32 group, gfp_t allocation) 1810 { 1811 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, 1812 NULL, NULL); 1813 } 1814 EXPORT_SYMBOL(netlink_broadcast); 1815 1816 struct netlink_set_err_data { 1817 struct sock *exclude_sk; 1818 u32 portid; 1819 u32 group; 1820 int code; 1821 }; 1822 1823 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1824 { 1825 struct netlink_sock *nlk = nlk_sk(sk); 1826 int ret = 0; 1827 1828 if (sk == p->exclude_sk) 1829 goto out; 1830 1831 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1832 goto out; 1833 1834 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1835 !test_bit(p->group - 1, nlk->groups)) 1836 goto out; 1837 1838 if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { 1839 ret = 1; 1840 goto out; 1841 } 1842 1843 sk->sk_err = p->code; 1844 sk->sk_error_report(sk); 1845 out: 1846 return ret; 1847 } 1848 1849 /** 1850 * netlink_set_err - report error to broadcast listeners 1851 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1852 * @portid: the PORTID of a process that we want to skip (if any) 1853 * @groups: the broadcast group that will notice the error 1854 * @code: error code, must be negative (as usual in kernelspace) 1855 * 1856 * This function returns the number of broadcast listeners that have set the 1857 * NETLINK_RECV_NO_ENOBUFS socket option. 1858 */ 1859 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1860 { 1861 struct netlink_set_err_data info; 1862 struct sock *sk; 1863 int ret = 0; 1864 1865 info.exclude_sk = ssk; 1866 info.portid = portid; 1867 info.group = group; 1868 /* sk->sk_err wants a positive error value */ 1869 info.code = -code; 1870 1871 read_lock(&nl_table_lock); 1872 1873 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1874 ret += do_one_set_err(sk, &info); 1875 1876 read_unlock(&nl_table_lock); 1877 return ret; 1878 } 1879 EXPORT_SYMBOL(netlink_set_err); 1880 1881 /* must be called with netlink table grabbed */ 1882 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1883 unsigned int group, 1884 int is_new) 1885 { 1886 int old, new = !!is_new, subscriptions; 1887 1888 old = test_bit(group - 1, nlk->groups); 1889 subscriptions = nlk->subscriptions - old + new; 1890 if (new) 1891 __set_bit(group - 1, nlk->groups); 1892 else 1893 __clear_bit(group - 1, nlk->groups); 1894 netlink_update_subscriptions(&nlk->sk, subscriptions); 1895 netlink_update_listeners(&nlk->sk); 1896 } 1897 1898 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1899 char __user *optval, unsigned int optlen) 1900 { 1901 struct sock *sk = sock->sk; 1902 struct netlink_sock *nlk = nlk_sk(sk); 1903 unsigned int val = 0; 1904 int err; 1905 1906 if (level != SOL_NETLINK) 1907 return -ENOPROTOOPT; 1908 1909 if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && 1910 optlen >= sizeof(int) && 1911 get_user(val, (unsigned int __user *)optval)) 1912 return -EFAULT; 1913 1914 switch (optname) { 1915 case NETLINK_PKTINFO: 1916 if (val) 1917 nlk->flags |= NETLINK_RECV_PKTINFO; 1918 else 1919 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1920 err = 0; 1921 break; 1922 case NETLINK_ADD_MEMBERSHIP: 1923 case NETLINK_DROP_MEMBERSHIP: { 1924 if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) 1925 return -EPERM; 1926 err = netlink_realloc_groups(sk); 1927 if (err) 1928 return err; 1929 if (!val || val - 1 >= nlk->ngroups) 1930 return -EINVAL; 1931 netlink_table_grab(); 1932 netlink_update_socket_mc(nlk, val, 1933 optname == NETLINK_ADD_MEMBERSHIP); 1934 netlink_table_ungrab(); 1935 1936 if (nlk->netlink_bind) 1937 nlk->netlink_bind(val); 1938 1939 err = 0; 1940 break; 1941 } 1942 case NETLINK_BROADCAST_ERROR: 1943 if (val) 1944 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; 1945 else 1946 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; 1947 err = 0; 1948 break; 1949 case NETLINK_NO_ENOBUFS: 1950 if (val) { 1951 nlk->flags |= NETLINK_RECV_NO_ENOBUFS; 1952 clear_bit(NETLINK_CONGESTED, &nlk->state); 1953 wake_up_interruptible(&nlk->wait); 1954 } else { 1955 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; 1956 } 1957 err = 0; 1958 break; 1959 #ifdef CONFIG_NETLINK_MMAP 1960 case NETLINK_RX_RING: 1961 case NETLINK_TX_RING: { 1962 struct nl_mmap_req req; 1963 1964 /* Rings might consume more memory than queue limits, require 1965 * CAP_NET_ADMIN. 1966 */ 1967 if (!capable(CAP_NET_ADMIN)) 1968 return -EPERM; 1969 if (optlen < sizeof(req)) 1970 return -EINVAL; 1971 if (copy_from_user(&req, optval, sizeof(req))) 1972 return -EFAULT; 1973 err = netlink_set_ring(sk, &req, false, 1974 optname == NETLINK_TX_RING); 1975 break; 1976 } 1977 #endif /* CONFIG_NETLINK_MMAP */ 1978 default: 1979 err = -ENOPROTOOPT; 1980 } 1981 return err; 1982 } 1983 1984 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1985 char __user *optval, int __user *optlen) 1986 { 1987 struct sock *sk = sock->sk; 1988 struct netlink_sock *nlk = nlk_sk(sk); 1989 int len, val, err; 1990 1991 if (level != SOL_NETLINK) 1992 return -ENOPROTOOPT; 1993 1994 if (get_user(len, optlen)) 1995 return -EFAULT; 1996 if (len < 0) 1997 return -EINVAL; 1998 1999 switch (optname) { 2000 case NETLINK_PKTINFO: 2001 if (len < sizeof(int)) 2002 return -EINVAL; 2003 len = sizeof(int); 2004 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 2005 if (put_user(len, optlen) || 2006 put_user(val, optval)) 2007 return -EFAULT; 2008 err = 0; 2009 break; 2010 case NETLINK_BROADCAST_ERROR: 2011 if (len < sizeof(int)) 2012 return -EINVAL; 2013 len = sizeof(int); 2014 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; 2015 if (put_user(len, optlen) || 2016 put_user(val, optval)) 2017 return -EFAULT; 2018 err = 0; 2019 break; 2020 case NETLINK_NO_ENOBUFS: 2021 if (len < sizeof(int)) 2022 return -EINVAL; 2023 len = sizeof(int); 2024 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; 2025 if (put_user(len, optlen) || 2026 put_user(val, optval)) 2027 return -EFAULT; 2028 err = 0; 2029 break; 2030 default: 2031 err = -ENOPROTOOPT; 2032 } 2033 return err; 2034 } 2035 2036 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 2037 { 2038 struct nl_pktinfo info; 2039 2040 info.group = NETLINK_CB(skb).dst_group; 2041 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 2042 } 2043 2044 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 2045 struct msghdr *msg, size_t len) 2046 { 2047 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 2048 struct sock *sk = sock->sk; 2049 struct netlink_sock *nlk = nlk_sk(sk); 2050 struct sockaddr_nl *addr = msg->msg_name; 2051 u32 dst_portid; 2052 u32 dst_group; 2053 struct sk_buff *skb; 2054 int err; 2055 struct scm_cookie scm; 2056 2057 if (msg->msg_flags&MSG_OOB) 2058 return -EOPNOTSUPP; 2059 2060 if (NULL == siocb->scm) 2061 siocb->scm = &scm; 2062 2063 err = scm_send(sock, msg, siocb->scm, true); 2064 if (err < 0) 2065 return err; 2066 2067 if (msg->msg_namelen) { 2068 err = -EINVAL; 2069 if (addr->nl_family != AF_NETLINK) 2070 goto out; 2071 dst_portid = addr->nl_pid; 2072 dst_group = ffs(addr->nl_groups); 2073 err = -EPERM; 2074 if ((dst_group || dst_portid) && 2075 !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) 2076 goto out; 2077 } else { 2078 dst_portid = nlk->dst_portid; 2079 dst_group = nlk->dst_group; 2080 } 2081 2082 if (!nlk->portid) { 2083 err = netlink_autobind(sock); 2084 if (err) 2085 goto out; 2086 } 2087 2088 if (netlink_tx_is_mmaped(sk) && 2089 msg->msg_iov->iov_base == NULL) { 2090 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, 2091 siocb); 2092 goto out; 2093 } 2094 2095 err = -EMSGSIZE; 2096 if (len > sk->sk_sndbuf - 32) 2097 goto out; 2098 err = -ENOBUFS; 2099 skb = alloc_skb(len, GFP_KERNEL); 2100 if (skb == NULL) 2101 goto out; 2102 2103 NETLINK_CB(skb).portid = nlk->portid; 2104 NETLINK_CB(skb).dst_group = dst_group; 2105 NETLINK_CB(skb).creds = siocb->scm->creds; 2106 2107 err = -EFAULT; 2108 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 2109 kfree_skb(skb); 2110 goto out; 2111 } 2112 2113 err = security_netlink_send(sk, skb); 2114 if (err) { 2115 kfree_skb(skb); 2116 goto out; 2117 } 2118 2119 if (dst_group) { 2120 atomic_inc(&skb->users); 2121 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 2122 } 2123 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); 2124 2125 out: 2126 scm_destroy(siocb->scm); 2127 return err; 2128 } 2129 2130 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 2131 struct msghdr *msg, size_t len, 2132 int flags) 2133 { 2134 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 2135 struct scm_cookie scm; 2136 struct sock *sk = sock->sk; 2137 struct netlink_sock *nlk = nlk_sk(sk); 2138 int noblock = flags&MSG_DONTWAIT; 2139 size_t copied; 2140 struct sk_buff *skb, *data_skb; 2141 int err, ret; 2142 2143 if (flags&MSG_OOB) 2144 return -EOPNOTSUPP; 2145 2146 copied = 0; 2147 2148 skb = skb_recv_datagram(sk, flags, noblock, &err); 2149 if (skb == NULL) 2150 goto out; 2151 2152 data_skb = skb; 2153 2154 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 2155 if (unlikely(skb_shinfo(skb)->frag_list)) { 2156 /* 2157 * If this skb has a frag_list, then here that means that we 2158 * will have to use the frag_list skb's data for compat tasks 2159 * and the regular skb's data for normal (non-compat) tasks. 2160 * 2161 * If we need to send the compat skb, assign it to the 2162 * 'data_skb' variable so that it will be used below for data 2163 * copying. We keep 'skb' for everything else, including 2164 * freeing both later. 2165 */ 2166 if (flags & MSG_CMSG_COMPAT) 2167 data_skb = skb_shinfo(skb)->frag_list; 2168 } 2169 #endif 2170 2171 msg->msg_namelen = 0; 2172 2173 copied = data_skb->len; 2174 if (len < copied) { 2175 msg->msg_flags |= MSG_TRUNC; 2176 copied = len; 2177 } 2178 2179 skb_reset_transport_header(data_skb); 2180 err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); 2181 2182 if (msg->msg_name) { 2183 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; 2184 addr->nl_family = AF_NETLINK; 2185 addr->nl_pad = 0; 2186 addr->nl_pid = NETLINK_CB(skb).portid; 2187 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 2188 msg->msg_namelen = sizeof(*addr); 2189 } 2190 2191 if (nlk->flags & NETLINK_RECV_PKTINFO) 2192 netlink_cmsg_recv_pktinfo(msg, skb); 2193 2194 if (NULL == siocb->scm) { 2195 memset(&scm, 0, sizeof(scm)); 2196 siocb->scm = &scm; 2197 } 2198 siocb->scm->creds = *NETLINK_CREDS(skb); 2199 if (flags & MSG_TRUNC) 2200 copied = data_skb->len; 2201 2202 skb_free_datagram(sk, skb); 2203 2204 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 2205 ret = netlink_dump(sk); 2206 if (ret) { 2207 sk->sk_err = ret; 2208 sk->sk_error_report(sk); 2209 } 2210 } 2211 2212 scm_recv(sock, msg, siocb->scm, flags); 2213 out: 2214 netlink_rcv_wake(sk); 2215 return err ? : copied; 2216 } 2217 2218 static void netlink_data_ready(struct sock *sk, int len) 2219 { 2220 BUG(); 2221 } 2222 2223 /* 2224 * We export these functions to other modules. They provide a 2225 * complete set of kernel non-blocking support for message 2226 * queueing. 2227 */ 2228 2229 struct sock * 2230 __netlink_kernel_create(struct net *net, int unit, struct module *module, 2231 struct netlink_kernel_cfg *cfg) 2232 { 2233 struct socket *sock; 2234 struct sock *sk; 2235 struct netlink_sock *nlk; 2236 struct listeners *listeners = NULL; 2237 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; 2238 unsigned int groups; 2239 2240 BUG_ON(!nl_table); 2241 2242 if (unit < 0 || unit >= MAX_LINKS) 2243 return NULL; 2244 2245 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 2246 return NULL; 2247 2248 /* 2249 * We have to just have a reference on the net from sk, but don't 2250 * get_net it. Besides, we cannot get and then put the net here. 2251 * So we create one inside init_net and the move it to net. 2252 */ 2253 2254 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) 2255 goto out_sock_release_nosk; 2256 2257 sk = sock->sk; 2258 sk_change_net(sk, net); 2259 2260 if (!cfg || cfg->groups < 32) 2261 groups = 32; 2262 else 2263 groups = cfg->groups; 2264 2265 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2266 if (!listeners) 2267 goto out_sock_release; 2268 2269 sk->sk_data_ready = netlink_data_ready; 2270 if (cfg && cfg->input) 2271 nlk_sk(sk)->netlink_rcv = cfg->input; 2272 2273 if (netlink_insert(sk, net, 0)) 2274 goto out_sock_release; 2275 2276 nlk = nlk_sk(sk); 2277 nlk->flags |= NETLINK_KERNEL_SOCKET; 2278 2279 netlink_table_grab(); 2280 if (!nl_table[unit].registered) { 2281 nl_table[unit].groups = groups; 2282 rcu_assign_pointer(nl_table[unit].listeners, listeners); 2283 nl_table[unit].cb_mutex = cb_mutex; 2284 nl_table[unit].module = module; 2285 if (cfg) { 2286 nl_table[unit].bind = cfg->bind; 2287 nl_table[unit].flags = cfg->flags; 2288 } 2289 nl_table[unit].registered = 1; 2290 } else { 2291 kfree(listeners); 2292 nl_table[unit].registered++; 2293 } 2294 netlink_table_ungrab(); 2295 return sk; 2296 2297 out_sock_release: 2298 kfree(listeners); 2299 netlink_kernel_release(sk); 2300 return NULL; 2301 2302 out_sock_release_nosk: 2303 sock_release(sock); 2304 return NULL; 2305 } 2306 EXPORT_SYMBOL(__netlink_kernel_create); 2307 2308 void 2309 netlink_kernel_release(struct sock *sk) 2310 { 2311 sk_release_kernel(sk); 2312 } 2313 EXPORT_SYMBOL(netlink_kernel_release); 2314 2315 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 2316 { 2317 struct listeners *new, *old; 2318 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 2319 2320 if (groups < 32) 2321 groups = 32; 2322 2323 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 2324 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 2325 if (!new) 2326 return -ENOMEM; 2327 old = nl_deref_protected(tbl->listeners); 2328 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 2329 rcu_assign_pointer(tbl->listeners, new); 2330 2331 kfree_rcu(old, rcu); 2332 } 2333 tbl->groups = groups; 2334 2335 return 0; 2336 } 2337 2338 /** 2339 * netlink_change_ngroups - change number of multicast groups 2340 * 2341 * This changes the number of multicast groups that are available 2342 * on a certain netlink family. Note that it is not possible to 2343 * change the number of groups to below 32. Also note that it does 2344 * not implicitly call netlink_clear_multicast_users() when the 2345 * number of groups is reduced. 2346 * 2347 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 2348 * @groups: The new number of groups. 2349 */ 2350 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 2351 { 2352 int err; 2353 2354 netlink_table_grab(); 2355 err = __netlink_change_ngroups(sk, groups); 2356 netlink_table_ungrab(); 2357 2358 return err; 2359 } 2360 2361 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2362 { 2363 struct sock *sk; 2364 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 2365 2366 sk_for_each_bound(sk, &tbl->mc_list) 2367 netlink_update_socket_mc(nlk_sk(sk), group, 0); 2368 } 2369 2370 /** 2371 * netlink_clear_multicast_users - kick off multicast listeners 2372 * 2373 * This function removes all listeners from the given group. 2374 * @ksk: The kernel netlink socket, as returned by 2375 * netlink_kernel_create(). 2376 * @group: The multicast group to clear. 2377 */ 2378 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2379 { 2380 netlink_table_grab(); 2381 __netlink_clear_multicast_users(ksk, group); 2382 netlink_table_ungrab(); 2383 } 2384 2385 struct nlmsghdr * 2386 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2387 { 2388 struct nlmsghdr *nlh; 2389 int size = nlmsg_msg_size(len); 2390 2391 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); 2392 nlh->nlmsg_type = type; 2393 nlh->nlmsg_len = size; 2394 nlh->nlmsg_flags = flags; 2395 nlh->nlmsg_pid = portid; 2396 nlh->nlmsg_seq = seq; 2397 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2398 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2399 return nlh; 2400 } 2401 EXPORT_SYMBOL(__nlmsg_put); 2402 2403 /* 2404 * It looks a bit ugly. 2405 * It would be better to create kernel thread. 2406 */ 2407 2408 static int netlink_dump(struct sock *sk) 2409 { 2410 struct netlink_sock *nlk = nlk_sk(sk); 2411 struct netlink_callback *cb; 2412 struct sk_buff *skb = NULL; 2413 struct nlmsghdr *nlh; 2414 int len, err = -ENOBUFS; 2415 int alloc_size; 2416 2417 mutex_lock(nlk->cb_mutex); 2418 2419 cb = nlk->cb; 2420 if (cb == NULL) { 2421 err = -EINVAL; 2422 goto errout_skb; 2423 } 2424 2425 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2426 2427 if (!netlink_rx_is_mmaped(sk) && 2428 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2429 goto errout_skb; 2430 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); 2431 if (!skb) 2432 goto errout_skb; 2433 netlink_skb_set_owner_r(skb, sk); 2434 2435 len = cb->dump(skb, cb); 2436 2437 if (len > 0) { 2438 mutex_unlock(nlk->cb_mutex); 2439 2440 if (sk_filter(sk, skb)) 2441 kfree_skb(skb); 2442 else 2443 __netlink_sendskb(sk, skb); 2444 return 0; 2445 } 2446 2447 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 2448 if (!nlh) 2449 goto errout_skb; 2450 2451 nl_dump_check_consistent(cb, nlh); 2452 2453 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 2454 2455 if (sk_filter(sk, skb)) 2456 kfree_skb(skb); 2457 else 2458 __netlink_sendskb(sk, skb); 2459 2460 if (cb->done) 2461 cb->done(cb); 2462 nlk->cb = NULL; 2463 mutex_unlock(nlk->cb_mutex); 2464 2465 module_put(cb->module); 2466 netlink_consume_callback(cb); 2467 return 0; 2468 2469 errout_skb: 2470 mutex_unlock(nlk->cb_mutex); 2471 kfree_skb(skb); 2472 return err; 2473 } 2474 2475 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 2476 const struct nlmsghdr *nlh, 2477 struct netlink_dump_control *control) 2478 { 2479 struct netlink_callback *cb; 2480 struct sock *sk; 2481 struct netlink_sock *nlk; 2482 int ret; 2483 2484 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 2485 if (cb == NULL) 2486 return -ENOBUFS; 2487 2488 /* Memory mapped dump requests need to be copied to avoid looping 2489 * on the pending state in netlink_mmap_sendmsg() while the CB hold 2490 * a reference to the skb. 2491 */ 2492 if (netlink_skb_is_mmaped(skb)) { 2493 skb = skb_copy(skb, GFP_KERNEL); 2494 if (skb == NULL) { 2495 kfree(cb); 2496 return -ENOBUFS; 2497 } 2498 } else 2499 atomic_inc(&skb->users); 2500 2501 cb->dump = control->dump; 2502 cb->done = control->done; 2503 cb->nlh = nlh; 2504 cb->data = control->data; 2505 cb->module = control->module; 2506 cb->min_dump_alloc = control->min_dump_alloc; 2507 cb->skb = skb; 2508 2509 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2510 if (sk == NULL) { 2511 netlink_destroy_callback(cb); 2512 return -ECONNREFUSED; 2513 } 2514 nlk = nlk_sk(sk); 2515 2516 mutex_lock(nlk->cb_mutex); 2517 /* A dump is in progress... */ 2518 if (nlk->cb) { 2519 mutex_unlock(nlk->cb_mutex); 2520 netlink_destroy_callback(cb); 2521 ret = -EBUSY; 2522 goto out; 2523 } 2524 /* add reference of module which cb->dump belongs to */ 2525 if (!try_module_get(cb->module)) { 2526 mutex_unlock(nlk->cb_mutex); 2527 netlink_destroy_callback(cb); 2528 ret = -EPROTONOSUPPORT; 2529 goto out; 2530 } 2531 2532 nlk->cb = cb; 2533 mutex_unlock(nlk->cb_mutex); 2534 2535 ret = netlink_dump(sk); 2536 out: 2537 sock_put(sk); 2538 2539 if (ret) 2540 return ret; 2541 2542 /* We successfully started a dump, by returning -EINTR we 2543 * signal not to send ACK even if it was requested. 2544 */ 2545 return -EINTR; 2546 } 2547 EXPORT_SYMBOL(__netlink_dump_start); 2548 2549 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 2550 { 2551 struct sk_buff *skb; 2552 struct nlmsghdr *rep; 2553 struct nlmsgerr *errmsg; 2554 size_t payload = sizeof(*errmsg); 2555 2556 /* error messages get the original request appened */ 2557 if (err) 2558 payload += nlmsg_len(nlh); 2559 2560 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), 2561 NETLINK_CB(in_skb).portid, GFP_KERNEL); 2562 if (!skb) { 2563 struct sock *sk; 2564 2565 sk = netlink_lookup(sock_net(in_skb->sk), 2566 in_skb->sk->sk_protocol, 2567 NETLINK_CB(in_skb).portid); 2568 if (sk) { 2569 sk->sk_err = ENOBUFS; 2570 sk->sk_error_report(sk); 2571 sock_put(sk); 2572 } 2573 return; 2574 } 2575 2576 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2577 NLMSG_ERROR, payload, 0); 2578 errmsg = nlmsg_data(rep); 2579 errmsg->error = err; 2580 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 2581 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); 2582 } 2583 EXPORT_SYMBOL(netlink_ack); 2584 2585 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 2586 struct nlmsghdr *)) 2587 { 2588 struct nlmsghdr *nlh; 2589 int err; 2590 2591 while (skb->len >= nlmsg_total_size(0)) { 2592 int msglen; 2593 2594 nlh = nlmsg_hdr(skb); 2595 err = 0; 2596 2597 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 2598 return 0; 2599 2600 /* Only requests are handled by the kernel */ 2601 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 2602 goto ack; 2603 2604 /* Skip control messages */ 2605 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 2606 goto ack; 2607 2608 err = cb(skb, nlh); 2609 if (err == -EINTR) 2610 goto skip; 2611 2612 ack: 2613 if (nlh->nlmsg_flags & NLM_F_ACK || err) 2614 netlink_ack(skb, nlh, err); 2615 2616 skip: 2617 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 2618 if (msglen > skb->len) 2619 msglen = skb->len; 2620 skb_pull(skb, msglen); 2621 } 2622 2623 return 0; 2624 } 2625 EXPORT_SYMBOL(netlink_rcv_skb); 2626 2627 /** 2628 * nlmsg_notify - send a notification netlink message 2629 * @sk: netlink socket to use 2630 * @skb: notification message 2631 * @portid: destination netlink portid for reports or 0 2632 * @group: destination multicast group or 0 2633 * @report: 1 to report back, 0 to disable 2634 * @flags: allocation flags 2635 */ 2636 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 2637 unsigned int group, int report, gfp_t flags) 2638 { 2639 int err = 0; 2640 2641 if (group) { 2642 int exclude_portid = 0; 2643 2644 if (report) { 2645 atomic_inc(&skb->users); 2646 exclude_portid = portid; 2647 } 2648 2649 /* errors reported via destination sk->sk_err, but propagate 2650 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 2651 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 2652 } 2653 2654 if (report) { 2655 int err2; 2656 2657 err2 = nlmsg_unicast(sk, skb, portid); 2658 if (!err || err == -ESRCH) 2659 err = err2; 2660 } 2661 2662 return err; 2663 } 2664 EXPORT_SYMBOL(nlmsg_notify); 2665 2666 #ifdef CONFIG_PROC_FS 2667 struct nl_seq_iter { 2668 struct seq_net_private p; 2669 int link; 2670 int hash_idx; 2671 }; 2672 2673 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 2674 { 2675 struct nl_seq_iter *iter = seq->private; 2676 int i, j; 2677 struct sock *s; 2678 loff_t off = 0; 2679 2680 for (i = 0; i < MAX_LINKS; i++) { 2681 struct nl_portid_hash *hash = &nl_table[i].hash; 2682 2683 for (j = 0; j <= hash->mask; j++) { 2684 sk_for_each(s, &hash->table[j]) { 2685 if (sock_net(s) != seq_file_net(seq)) 2686 continue; 2687 if (off == pos) { 2688 iter->link = i; 2689 iter->hash_idx = j; 2690 return s; 2691 } 2692 ++off; 2693 } 2694 } 2695 } 2696 return NULL; 2697 } 2698 2699 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 2700 __acquires(nl_table_lock) 2701 { 2702 read_lock(&nl_table_lock); 2703 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2704 } 2705 2706 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2707 { 2708 struct sock *s; 2709 struct nl_seq_iter *iter; 2710 int i, j; 2711 2712 ++*pos; 2713 2714 if (v == SEQ_START_TOKEN) 2715 return netlink_seq_socket_idx(seq, 0); 2716 2717 iter = seq->private; 2718 s = v; 2719 do { 2720 s = sk_next(s); 2721 } while (s && sock_net(s) != seq_file_net(seq)); 2722 if (s) 2723 return s; 2724 2725 i = iter->link; 2726 j = iter->hash_idx + 1; 2727 2728 do { 2729 struct nl_portid_hash *hash = &nl_table[i].hash; 2730 2731 for (; j <= hash->mask; j++) { 2732 s = sk_head(&hash->table[j]); 2733 while (s && sock_net(s) != seq_file_net(seq)) 2734 s = sk_next(s); 2735 if (s) { 2736 iter->link = i; 2737 iter->hash_idx = j; 2738 return s; 2739 } 2740 } 2741 2742 j = 0; 2743 } while (++i < MAX_LINKS); 2744 2745 return NULL; 2746 } 2747 2748 static void netlink_seq_stop(struct seq_file *seq, void *v) 2749 __releases(nl_table_lock) 2750 { 2751 read_unlock(&nl_table_lock); 2752 } 2753 2754 2755 static int netlink_seq_show(struct seq_file *seq, void *v) 2756 { 2757 if (v == SEQ_START_TOKEN) { 2758 seq_puts(seq, 2759 "sk Eth Pid Groups " 2760 "Rmem Wmem Dump Locks Drops Inode\n"); 2761 } else { 2762 struct sock *s = v; 2763 struct netlink_sock *nlk = nlk_sk(s); 2764 2765 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", 2766 s, 2767 s->sk_protocol, 2768 nlk->portid, 2769 nlk->groups ? (u32)nlk->groups[0] : 0, 2770 sk_rmem_alloc_get(s), 2771 sk_wmem_alloc_get(s), 2772 nlk->cb, 2773 atomic_read(&s->sk_refcnt), 2774 atomic_read(&s->sk_drops), 2775 sock_i_ino(s) 2776 ); 2777 2778 } 2779 return 0; 2780 } 2781 2782 static const struct seq_operations netlink_seq_ops = { 2783 .start = netlink_seq_start, 2784 .next = netlink_seq_next, 2785 .stop = netlink_seq_stop, 2786 .show = netlink_seq_show, 2787 }; 2788 2789 2790 static int netlink_seq_open(struct inode *inode, struct file *file) 2791 { 2792 return seq_open_net(inode, file, &netlink_seq_ops, 2793 sizeof(struct nl_seq_iter)); 2794 } 2795 2796 static const struct file_operations netlink_seq_fops = { 2797 .owner = THIS_MODULE, 2798 .open = netlink_seq_open, 2799 .read = seq_read, 2800 .llseek = seq_lseek, 2801 .release = seq_release_net, 2802 }; 2803 2804 #endif 2805 2806 int netlink_register_notifier(struct notifier_block *nb) 2807 { 2808 return atomic_notifier_chain_register(&netlink_chain, nb); 2809 } 2810 EXPORT_SYMBOL(netlink_register_notifier); 2811 2812 int netlink_unregister_notifier(struct notifier_block *nb) 2813 { 2814 return atomic_notifier_chain_unregister(&netlink_chain, nb); 2815 } 2816 EXPORT_SYMBOL(netlink_unregister_notifier); 2817 2818 static const struct proto_ops netlink_ops = { 2819 .family = PF_NETLINK, 2820 .owner = THIS_MODULE, 2821 .release = netlink_release, 2822 .bind = netlink_bind, 2823 .connect = netlink_connect, 2824 .socketpair = sock_no_socketpair, 2825 .accept = sock_no_accept, 2826 .getname = netlink_getname, 2827 .poll = netlink_poll, 2828 .ioctl = sock_no_ioctl, 2829 .listen = sock_no_listen, 2830 .shutdown = sock_no_shutdown, 2831 .setsockopt = netlink_setsockopt, 2832 .getsockopt = netlink_getsockopt, 2833 .sendmsg = netlink_sendmsg, 2834 .recvmsg = netlink_recvmsg, 2835 .mmap = netlink_mmap, 2836 .sendpage = sock_no_sendpage, 2837 }; 2838 2839 static const struct net_proto_family netlink_family_ops = { 2840 .family = PF_NETLINK, 2841 .create = netlink_create, 2842 .owner = THIS_MODULE, /* for consistency 8) */ 2843 }; 2844 2845 static int __net_init netlink_net_init(struct net *net) 2846 { 2847 #ifdef CONFIG_PROC_FS 2848 if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops)) 2849 return -ENOMEM; 2850 #endif 2851 return 0; 2852 } 2853 2854 static void __net_exit netlink_net_exit(struct net *net) 2855 { 2856 #ifdef CONFIG_PROC_FS 2857 remove_proc_entry("netlink", net->proc_net); 2858 #endif 2859 } 2860 2861 static void __init netlink_add_usersock_entry(void) 2862 { 2863 struct listeners *listeners; 2864 int groups = 32; 2865 2866 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2867 if (!listeners) 2868 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2869 2870 netlink_table_grab(); 2871 2872 nl_table[NETLINK_USERSOCK].groups = groups; 2873 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2874 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2875 nl_table[NETLINK_USERSOCK].registered = 1; 2876 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2877 2878 netlink_table_ungrab(); 2879 } 2880 2881 static struct pernet_operations __net_initdata netlink_net_ops = { 2882 .init = netlink_net_init, 2883 .exit = netlink_net_exit, 2884 }; 2885 2886 static int __init netlink_proto_init(void) 2887 { 2888 int i; 2889 unsigned long limit; 2890 unsigned int order; 2891 int err = proto_register(&netlink_proto, 0); 2892 2893 if (err != 0) 2894 goto out; 2895 2896 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2897 2898 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2899 if (!nl_table) 2900 goto panic; 2901 2902 if (totalram_pages >= (128 * 1024)) 2903 limit = totalram_pages >> (21 - PAGE_SHIFT); 2904 else 2905 limit = totalram_pages >> (23 - PAGE_SHIFT); 2906 2907 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 2908 limit = (1UL << order) / sizeof(struct hlist_head); 2909 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 2910 2911 for (i = 0; i < MAX_LINKS; i++) { 2912 struct nl_portid_hash *hash = &nl_table[i].hash; 2913 2914 hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); 2915 if (!hash->table) { 2916 while (i-- > 0) 2917 nl_portid_hash_free(nl_table[i].hash.table, 2918 1 * sizeof(*hash->table)); 2919 kfree(nl_table); 2920 goto panic; 2921 } 2922 hash->max_shift = order; 2923 hash->shift = 0; 2924 hash->mask = 0; 2925 hash->rehash_time = jiffies; 2926 } 2927 2928 netlink_add_usersock_entry(); 2929 2930 sock_register(&netlink_family_ops); 2931 register_pernet_subsys(&netlink_net_ops); 2932 /* The netlink device handler may be needed early. */ 2933 rtnetlink_init(); 2934 out: 2935 return err; 2936 panic: 2937 panic("netlink_init: Cannot allocate nl_table\n"); 2938 } 2939 2940 core_initcall(netlink_proto_init); 2941