1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NETLINK Kernel-user communication protocol. 4 * 5 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * Patrick McHardy <kaber@trash.net> 8 * 9 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 10 * added netlink_proto_exit 11 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 12 * use nlk_sk, as sk->protinfo is on a diet 8) 13 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 14 * - inc module use count of module that owns 15 * the kernel socket in case userspace opens 16 * socket of same protocol 17 * - remove all module support, since netlink is 18 * mandatory if CONFIG_NET=y these days 19 */ 20 21 #include <linux/module.h> 22 23 #include <linux/bpf.h> 24 #include <linux/capability.h> 25 #include <linux/kernel.h> 26 #include <linux/filter.h> 27 #include <linux/init.h> 28 #include <linux/signal.h> 29 #include <linux/sched.h> 30 #include <linux/errno.h> 31 #include <linux/string.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/un.h> 35 #include <linux/fcntl.h> 36 #include <linux/termios.h> 37 #include <linux/sockios.h> 38 #include <linux/net.h> 39 #include <linux/fs.h> 40 #include <linux/slab.h> 41 #include <linux/uaccess.h> 42 #include <linux/skbuff.h> 43 #include <linux/netdevice.h> 44 #include <linux/rtnetlink.h> 45 #include <linux/proc_fs.h> 46 #include <linux/seq_file.h> 47 #include <linux/notifier.h> 48 #include <linux/security.h> 49 #include <linux/jhash.h> 50 #include <linux/jiffies.h> 51 #include <linux/random.h> 52 #include <linux/bitops.h> 53 #include <linux/mm.h> 54 #include <linux/types.h> 55 #include <linux/audit.h> 56 #include <linux/mutex.h> 57 #include <linux/vmalloc.h> 58 #include <linux/if_arp.h> 59 #include <linux/rhashtable.h> 60 #include <asm/cacheflush.h> 61 #include <linux/hash.h> 62 #include <linux/genetlink.h> 63 #include <linux/net_namespace.h> 64 #include <linux/nospec.h> 65 #include <linux/btf_ids.h> 66 67 #include <net/net_namespace.h> 68 #include <net/netns/generic.h> 69 #include <net/sock.h> 70 #include <net/scm.h> 71 #include <net/netlink.h> 72 #define CREATE_TRACE_POINTS 73 #include <trace/events/netlink.h> 74 75 #include "af_netlink.h" 76 77 struct listeners { 78 struct rcu_head rcu; 79 unsigned long masks[]; 80 }; 81 82 /* state bits */ 83 #define NETLINK_S_CONGESTED 0x0 84 85 static inline int netlink_is_kernel(struct sock *sk) 86 { 87 return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; 88 } 89 90 struct netlink_table *nl_table __read_mostly; 91 EXPORT_SYMBOL_GPL(nl_table); 92 93 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 94 95 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; 96 97 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { 98 "nlk_cb_mutex-ROUTE", 99 "nlk_cb_mutex-1", 100 "nlk_cb_mutex-USERSOCK", 101 "nlk_cb_mutex-FIREWALL", 102 "nlk_cb_mutex-SOCK_DIAG", 103 "nlk_cb_mutex-NFLOG", 104 "nlk_cb_mutex-XFRM", 105 "nlk_cb_mutex-SELINUX", 106 "nlk_cb_mutex-ISCSI", 107 "nlk_cb_mutex-AUDIT", 108 "nlk_cb_mutex-FIB_LOOKUP", 109 "nlk_cb_mutex-CONNECTOR", 110 "nlk_cb_mutex-NETFILTER", 111 "nlk_cb_mutex-IP6_FW", 112 "nlk_cb_mutex-DNRTMSG", 113 "nlk_cb_mutex-KOBJECT_UEVENT", 114 "nlk_cb_mutex-GENERIC", 115 "nlk_cb_mutex-17", 116 "nlk_cb_mutex-SCSITRANSPORT", 117 "nlk_cb_mutex-ECRYPTFS", 118 "nlk_cb_mutex-RDMA", 119 "nlk_cb_mutex-CRYPTO", 120 "nlk_cb_mutex-SMC", 121 "nlk_cb_mutex-23", 122 "nlk_cb_mutex-24", 123 "nlk_cb_mutex-25", 124 "nlk_cb_mutex-26", 125 "nlk_cb_mutex-27", 126 "nlk_cb_mutex-28", 127 "nlk_cb_mutex-29", 128 "nlk_cb_mutex-30", 129 "nlk_cb_mutex-31", 130 "nlk_cb_mutex-MAX_LINKS" 131 }; 132 133 static int netlink_dump(struct sock *sk); 134 135 /* nl_table locking explained: 136 * Lookup and traversal are protected with an RCU read-side lock. Insertion 137 * and removal are protected with per bucket lock while using RCU list 138 * modification primitives and may run in parallel to RCU protected lookups. 139 * Destruction of the Netlink socket may only occur *after* nl_table_lock has 140 * been acquired * either during or after the socket has been removed from 141 * the list and after an RCU grace period. 142 */ 143 DEFINE_RWLOCK(nl_table_lock); 144 EXPORT_SYMBOL_GPL(nl_table_lock); 145 static atomic_t nl_table_users = ATOMIC_INIT(0); 146 147 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 148 149 static BLOCKING_NOTIFIER_HEAD(netlink_chain); 150 151 152 static const struct rhashtable_params netlink_rhashtable_params; 153 154 void do_trace_netlink_extack(const char *msg) 155 { 156 trace_netlink_extack(msg); 157 } 158 EXPORT_SYMBOL(do_trace_netlink_extack); 159 160 static inline u32 netlink_group_mask(u32 group) 161 { 162 if (group > 32) 163 return 0; 164 return group ? 1 << (group - 1) : 0; 165 } 166 167 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, 168 gfp_t gfp_mask) 169 { 170 unsigned int len = skb_end_offset(skb); 171 struct sk_buff *new; 172 173 new = alloc_skb(len, gfp_mask); 174 if (new == NULL) 175 return NULL; 176 177 NETLINK_CB(new).portid = NETLINK_CB(skb).portid; 178 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; 179 NETLINK_CB(new).creds = NETLINK_CB(skb).creds; 180 181 skb_put_data(new, skb->data, len); 182 return new; 183 } 184 185 static unsigned int netlink_tap_net_id; 186 187 struct netlink_tap_net { 188 struct list_head netlink_tap_all; 189 struct mutex netlink_tap_lock; 190 }; 191 192 int netlink_add_tap(struct netlink_tap *nt) 193 { 194 struct net *net = dev_net(nt->dev); 195 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 196 197 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 198 return -EINVAL; 199 200 mutex_lock(&nn->netlink_tap_lock); 201 list_add_rcu(&nt->list, &nn->netlink_tap_all); 202 mutex_unlock(&nn->netlink_tap_lock); 203 204 __module_get(nt->module); 205 206 return 0; 207 } 208 EXPORT_SYMBOL_GPL(netlink_add_tap); 209 210 static int __netlink_remove_tap(struct netlink_tap *nt) 211 { 212 struct net *net = dev_net(nt->dev); 213 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 214 bool found = false; 215 struct netlink_tap *tmp; 216 217 mutex_lock(&nn->netlink_tap_lock); 218 219 list_for_each_entry(tmp, &nn->netlink_tap_all, list) { 220 if (nt == tmp) { 221 list_del_rcu(&nt->list); 222 found = true; 223 goto out; 224 } 225 } 226 227 pr_warn("__netlink_remove_tap: %p not found\n", nt); 228 out: 229 mutex_unlock(&nn->netlink_tap_lock); 230 231 if (found) 232 module_put(nt->module); 233 234 return found ? 0 : -ENODEV; 235 } 236 237 int netlink_remove_tap(struct netlink_tap *nt) 238 { 239 int ret; 240 241 ret = __netlink_remove_tap(nt); 242 synchronize_net(); 243 244 return ret; 245 } 246 EXPORT_SYMBOL_GPL(netlink_remove_tap); 247 248 static __net_init int netlink_tap_init_net(struct net *net) 249 { 250 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 251 252 INIT_LIST_HEAD(&nn->netlink_tap_all); 253 mutex_init(&nn->netlink_tap_lock); 254 return 0; 255 } 256 257 static struct pernet_operations netlink_tap_net_ops = { 258 .init = netlink_tap_init_net, 259 .id = &netlink_tap_net_id, 260 .size = sizeof(struct netlink_tap_net), 261 }; 262 263 static bool netlink_filter_tap(const struct sk_buff *skb) 264 { 265 struct sock *sk = skb->sk; 266 267 /* We take the more conservative approach and 268 * whitelist socket protocols that may pass. 269 */ 270 switch (sk->sk_protocol) { 271 case NETLINK_ROUTE: 272 case NETLINK_USERSOCK: 273 case NETLINK_SOCK_DIAG: 274 case NETLINK_NFLOG: 275 case NETLINK_XFRM: 276 case NETLINK_FIB_LOOKUP: 277 case NETLINK_NETFILTER: 278 case NETLINK_GENERIC: 279 return true; 280 } 281 282 return false; 283 } 284 285 static int __netlink_deliver_tap_skb(struct sk_buff *skb, 286 struct net_device *dev) 287 { 288 struct sk_buff *nskb; 289 struct sock *sk = skb->sk; 290 int ret = -ENOMEM; 291 292 if (!net_eq(dev_net(dev), sock_net(sk))) 293 return 0; 294 295 dev_hold(dev); 296 297 if (is_vmalloc_addr(skb->head)) 298 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 299 else 300 nskb = skb_clone(skb, GFP_ATOMIC); 301 if (nskb) { 302 nskb->dev = dev; 303 nskb->protocol = htons((u16) sk->sk_protocol); 304 nskb->pkt_type = netlink_is_kernel(sk) ? 305 PACKET_KERNEL : PACKET_USER; 306 skb_reset_network_header(nskb); 307 ret = dev_queue_xmit(nskb); 308 if (unlikely(ret > 0)) 309 ret = net_xmit_errno(ret); 310 } 311 312 dev_put(dev); 313 return ret; 314 } 315 316 static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn) 317 { 318 int ret; 319 struct netlink_tap *tmp; 320 321 if (!netlink_filter_tap(skb)) 322 return; 323 324 list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) { 325 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 326 if (unlikely(ret)) 327 break; 328 } 329 } 330 331 static void netlink_deliver_tap(struct net *net, struct sk_buff *skb) 332 { 333 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 334 335 rcu_read_lock(); 336 337 if (unlikely(!list_empty(&nn->netlink_tap_all))) 338 __netlink_deliver_tap(skb, nn); 339 340 rcu_read_unlock(); 341 } 342 343 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, 344 struct sk_buff *skb) 345 { 346 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) 347 netlink_deliver_tap(sock_net(dst), skb); 348 } 349 350 static void netlink_overrun(struct sock *sk) 351 { 352 struct netlink_sock *nlk = nlk_sk(sk); 353 354 if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { 355 if (!test_and_set_bit(NETLINK_S_CONGESTED, 356 &nlk_sk(sk)->state)) { 357 sk->sk_err = ENOBUFS; 358 sk_error_report(sk); 359 } 360 } 361 atomic_inc(&sk->sk_drops); 362 } 363 364 static void netlink_rcv_wake(struct sock *sk) 365 { 366 struct netlink_sock *nlk = nlk_sk(sk); 367 368 if (skb_queue_empty_lockless(&sk->sk_receive_queue)) 369 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 370 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) 371 wake_up_interruptible(&nlk->wait); 372 } 373 374 static void netlink_skb_destructor(struct sk_buff *skb) 375 { 376 if (is_vmalloc_addr(skb->head)) { 377 if (!skb->cloned || 378 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 379 vfree(skb->head); 380 381 skb->head = NULL; 382 } 383 if (skb->sk != NULL) 384 sock_rfree(skb); 385 } 386 387 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 388 { 389 WARN_ON(skb->sk != NULL); 390 skb->sk = sk; 391 skb->destructor = netlink_skb_destructor; 392 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 393 sk_mem_charge(sk, skb->truesize); 394 } 395 396 static void netlink_sock_destruct(struct sock *sk) 397 { 398 struct netlink_sock *nlk = nlk_sk(sk); 399 400 if (nlk->cb_running) { 401 if (nlk->cb.done) 402 nlk->cb.done(&nlk->cb); 403 module_put(nlk->cb.module); 404 kfree_skb(nlk->cb.skb); 405 } 406 407 skb_queue_purge(&sk->sk_receive_queue); 408 409 if (!sock_flag(sk, SOCK_DEAD)) { 410 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 411 return; 412 } 413 414 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 415 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 416 WARN_ON(nlk_sk(sk)->groups); 417 } 418 419 static void netlink_sock_destruct_work(struct work_struct *work) 420 { 421 struct netlink_sock *nlk = container_of(work, struct netlink_sock, 422 work); 423 424 sk_free(&nlk->sk); 425 } 426 427 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 428 * SMP. Look, when several writers sleep and reader wakes them up, all but one 429 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 430 * this, _but_ remember, it adds useless work on UP machines. 431 */ 432 433 void netlink_table_grab(void) 434 __acquires(nl_table_lock) 435 { 436 might_sleep(); 437 438 write_lock_irq(&nl_table_lock); 439 440 if (atomic_read(&nl_table_users)) { 441 DECLARE_WAITQUEUE(wait, current); 442 443 add_wait_queue_exclusive(&nl_table_wait, &wait); 444 for (;;) { 445 set_current_state(TASK_UNINTERRUPTIBLE); 446 if (atomic_read(&nl_table_users) == 0) 447 break; 448 write_unlock_irq(&nl_table_lock); 449 schedule(); 450 write_lock_irq(&nl_table_lock); 451 } 452 453 __set_current_state(TASK_RUNNING); 454 remove_wait_queue(&nl_table_wait, &wait); 455 } 456 } 457 458 void netlink_table_ungrab(void) 459 __releases(nl_table_lock) 460 { 461 write_unlock_irq(&nl_table_lock); 462 wake_up(&nl_table_wait); 463 } 464 465 static inline void 466 netlink_lock_table(void) 467 { 468 unsigned long flags; 469 470 /* read_lock() synchronizes us to netlink_table_grab */ 471 472 read_lock_irqsave(&nl_table_lock, flags); 473 atomic_inc(&nl_table_users); 474 read_unlock_irqrestore(&nl_table_lock, flags); 475 } 476 477 static inline void 478 netlink_unlock_table(void) 479 { 480 if (atomic_dec_and_test(&nl_table_users)) 481 wake_up(&nl_table_wait); 482 } 483 484 struct netlink_compare_arg 485 { 486 possible_net_t pnet; 487 u32 portid; 488 }; 489 490 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ 491 #define netlink_compare_arg_len \ 492 (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) 493 494 static inline int netlink_compare(struct rhashtable_compare_arg *arg, 495 const void *ptr) 496 { 497 const struct netlink_compare_arg *x = arg->key; 498 const struct netlink_sock *nlk = ptr; 499 500 return nlk->portid != x->portid || 501 !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); 502 } 503 504 static void netlink_compare_arg_init(struct netlink_compare_arg *arg, 505 struct net *net, u32 portid) 506 { 507 memset(arg, 0, sizeof(*arg)); 508 write_pnet(&arg->pnet, net); 509 arg->portid = portid; 510 } 511 512 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, 513 struct net *net) 514 { 515 struct netlink_compare_arg arg; 516 517 netlink_compare_arg_init(&arg, net, portid); 518 return rhashtable_lookup_fast(&table->hash, &arg, 519 netlink_rhashtable_params); 520 } 521 522 static int __netlink_insert(struct netlink_table *table, struct sock *sk) 523 { 524 struct netlink_compare_arg arg; 525 526 netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); 527 return rhashtable_lookup_insert_key(&table->hash, &arg, 528 &nlk_sk(sk)->node, 529 netlink_rhashtable_params); 530 } 531 532 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 533 { 534 struct netlink_table *table = &nl_table[protocol]; 535 struct sock *sk; 536 537 rcu_read_lock(); 538 sk = __netlink_lookup(table, portid, net); 539 if (sk) 540 sock_hold(sk); 541 rcu_read_unlock(); 542 543 return sk; 544 } 545 546 static const struct proto_ops netlink_ops; 547 548 static void 549 netlink_update_listeners(struct sock *sk) 550 { 551 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 552 unsigned long mask; 553 unsigned int i; 554 struct listeners *listeners; 555 556 listeners = nl_deref_protected(tbl->listeners); 557 if (!listeners) 558 return; 559 560 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 561 mask = 0; 562 sk_for_each_bound(sk, &tbl->mc_list) { 563 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 564 mask |= nlk_sk(sk)->groups[i]; 565 } 566 listeners->masks[i] = mask; 567 } 568 /* this function is only called with the netlink table "grabbed", which 569 * makes sure updates are visible before bind or setsockopt return. */ 570 } 571 572 static int netlink_insert(struct sock *sk, u32 portid) 573 { 574 struct netlink_table *table = &nl_table[sk->sk_protocol]; 575 int err; 576 577 lock_sock(sk); 578 579 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; 580 if (nlk_sk(sk)->bound) 581 goto err; 582 583 nlk_sk(sk)->portid = portid; 584 sock_hold(sk); 585 586 err = __netlink_insert(table, sk); 587 if (err) { 588 /* In case the hashtable backend returns with -EBUSY 589 * from here, it must not escape to the caller. 590 */ 591 if (unlikely(err == -EBUSY)) 592 err = -EOVERFLOW; 593 if (err == -EEXIST) 594 err = -EADDRINUSE; 595 sock_put(sk); 596 goto err; 597 } 598 599 /* We need to ensure that the socket is hashed and visible. */ 600 smp_wmb(); 601 /* Paired with lockless reads from netlink_bind(), 602 * netlink_connect() and netlink_sendmsg(). 603 */ 604 WRITE_ONCE(nlk_sk(sk)->bound, portid); 605 606 err: 607 release_sock(sk); 608 return err; 609 } 610 611 static void netlink_remove(struct sock *sk) 612 { 613 struct netlink_table *table; 614 615 table = &nl_table[sk->sk_protocol]; 616 if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, 617 netlink_rhashtable_params)) { 618 WARN_ON(refcount_read(&sk->sk_refcnt) == 1); 619 __sock_put(sk); 620 } 621 622 netlink_table_grab(); 623 if (nlk_sk(sk)->subscriptions) { 624 __sk_del_bind_node(sk); 625 netlink_update_listeners(sk); 626 } 627 if (sk->sk_protocol == NETLINK_GENERIC) 628 atomic_inc(&genl_sk_destructing_cnt); 629 netlink_table_ungrab(); 630 } 631 632 static struct proto netlink_proto = { 633 .name = "NETLINK", 634 .owner = THIS_MODULE, 635 .obj_size = sizeof(struct netlink_sock), 636 }; 637 638 static int __netlink_create(struct net *net, struct socket *sock, 639 struct mutex *cb_mutex, int protocol, 640 int kern) 641 { 642 struct sock *sk; 643 struct netlink_sock *nlk; 644 645 sock->ops = &netlink_ops; 646 647 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); 648 if (!sk) 649 return -ENOMEM; 650 651 sock_init_data(sock, sk); 652 653 nlk = nlk_sk(sk); 654 if (cb_mutex) { 655 nlk->cb_mutex = cb_mutex; 656 } else { 657 nlk->cb_mutex = &nlk->cb_def_mutex; 658 mutex_init(nlk->cb_mutex); 659 lockdep_set_class_and_name(nlk->cb_mutex, 660 nlk_cb_mutex_keys + protocol, 661 nlk_cb_mutex_key_strings[protocol]); 662 } 663 init_waitqueue_head(&nlk->wait); 664 665 sk->sk_destruct = netlink_sock_destruct; 666 sk->sk_protocol = protocol; 667 return 0; 668 } 669 670 static int netlink_create(struct net *net, struct socket *sock, int protocol, 671 int kern) 672 { 673 struct module *module = NULL; 674 struct mutex *cb_mutex; 675 struct netlink_sock *nlk; 676 int (*bind)(struct net *net, int group); 677 void (*unbind)(struct net *net, int group); 678 int err = 0; 679 680 sock->state = SS_UNCONNECTED; 681 682 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 683 return -ESOCKTNOSUPPORT; 684 685 if (protocol < 0 || protocol >= MAX_LINKS) 686 return -EPROTONOSUPPORT; 687 protocol = array_index_nospec(protocol, MAX_LINKS); 688 689 netlink_lock_table(); 690 #ifdef CONFIG_MODULES 691 if (!nl_table[protocol].registered) { 692 netlink_unlock_table(); 693 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 694 netlink_lock_table(); 695 } 696 #endif 697 if (nl_table[protocol].registered && 698 try_module_get(nl_table[protocol].module)) 699 module = nl_table[protocol].module; 700 else 701 err = -EPROTONOSUPPORT; 702 cb_mutex = nl_table[protocol].cb_mutex; 703 bind = nl_table[protocol].bind; 704 unbind = nl_table[protocol].unbind; 705 netlink_unlock_table(); 706 707 if (err < 0) 708 goto out; 709 710 err = __netlink_create(net, sock, cb_mutex, protocol, kern); 711 if (err < 0) 712 goto out_module; 713 714 sock_prot_inuse_add(net, &netlink_proto, 1); 715 716 nlk = nlk_sk(sock->sk); 717 nlk->module = module; 718 nlk->netlink_bind = bind; 719 nlk->netlink_unbind = unbind; 720 out: 721 return err; 722 723 out_module: 724 module_put(module); 725 goto out; 726 } 727 728 static void deferred_put_nlk_sk(struct rcu_head *head) 729 { 730 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); 731 struct sock *sk = &nlk->sk; 732 733 kfree(nlk->groups); 734 nlk->groups = NULL; 735 736 if (!refcount_dec_and_test(&sk->sk_refcnt)) 737 return; 738 739 if (nlk->cb_running && nlk->cb.done) { 740 INIT_WORK(&nlk->work, netlink_sock_destruct_work); 741 schedule_work(&nlk->work); 742 return; 743 } 744 745 sk_free(sk); 746 } 747 748 static int netlink_release(struct socket *sock) 749 { 750 struct sock *sk = sock->sk; 751 struct netlink_sock *nlk; 752 753 if (!sk) 754 return 0; 755 756 netlink_remove(sk); 757 sock_orphan(sk); 758 nlk = nlk_sk(sk); 759 760 /* 761 * OK. Socket is unlinked, any packets that arrive now 762 * will be purged. 763 */ 764 765 /* must not acquire netlink_table_lock in any way again before unbind 766 * and notifying genetlink is done as otherwise it might deadlock 767 */ 768 if (nlk->netlink_unbind) { 769 int i; 770 771 for (i = 0; i < nlk->ngroups; i++) 772 if (test_bit(i, nlk->groups)) 773 nlk->netlink_unbind(sock_net(sk), i + 1); 774 } 775 if (sk->sk_protocol == NETLINK_GENERIC && 776 atomic_dec_return(&genl_sk_destructing_cnt) == 0) 777 wake_up(&genl_sk_destructing_waitq); 778 779 sock->sk = NULL; 780 wake_up_interruptible_all(&nlk->wait); 781 782 skb_queue_purge(&sk->sk_write_queue); 783 784 if (nlk->portid && nlk->bound) { 785 struct netlink_notify n = { 786 .net = sock_net(sk), 787 .protocol = sk->sk_protocol, 788 .portid = nlk->portid, 789 }; 790 blocking_notifier_call_chain(&netlink_chain, 791 NETLINK_URELEASE, &n); 792 } 793 794 module_put(nlk->module); 795 796 if (netlink_is_kernel(sk)) { 797 netlink_table_grab(); 798 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 799 if (--nl_table[sk->sk_protocol].registered == 0) { 800 struct listeners *old; 801 802 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 803 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 804 kfree_rcu(old, rcu); 805 nl_table[sk->sk_protocol].module = NULL; 806 nl_table[sk->sk_protocol].bind = NULL; 807 nl_table[sk->sk_protocol].unbind = NULL; 808 nl_table[sk->sk_protocol].flags = 0; 809 nl_table[sk->sk_protocol].registered = 0; 810 } 811 netlink_table_ungrab(); 812 } 813 814 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 815 816 /* Because struct net might disappear soon, do not keep a pointer. */ 817 if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { 818 __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); 819 /* Because of deferred_put_nlk_sk and use of work queue, 820 * it is possible netns will be freed before this socket. 821 */ 822 sock_net_set(sk, &init_net); 823 __netns_tracker_alloc(&init_net, &sk->ns_tracker, 824 false, GFP_KERNEL); 825 } 826 call_rcu(&nlk->rcu, deferred_put_nlk_sk); 827 return 0; 828 } 829 830 static int netlink_autobind(struct socket *sock) 831 { 832 struct sock *sk = sock->sk; 833 struct net *net = sock_net(sk); 834 struct netlink_table *table = &nl_table[sk->sk_protocol]; 835 s32 portid = task_tgid_vnr(current); 836 int err; 837 s32 rover = -4096; 838 bool ok; 839 840 retry: 841 cond_resched(); 842 rcu_read_lock(); 843 ok = !__netlink_lookup(table, portid, net); 844 rcu_read_unlock(); 845 if (!ok) { 846 /* Bind collision, search negative portid values. */ 847 if (rover == -4096) 848 /* rover will be in range [S32_MIN, -4097] */ 849 rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN); 850 else if (rover >= -4096) 851 rover = -4097; 852 portid = rover--; 853 goto retry; 854 } 855 856 err = netlink_insert(sk, portid); 857 if (err == -EADDRINUSE) 858 goto retry; 859 860 /* If 2 threads race to autobind, that is fine. */ 861 if (err == -EBUSY) 862 err = 0; 863 864 return err; 865 } 866 867 /** 868 * __netlink_ns_capable - General netlink message capability test 869 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. 870 * @user_ns: The user namespace of the capability to use 871 * @cap: The capability to use 872 * 873 * Test to see if the opener of the socket we received the message 874 * from had when the netlink socket was created and the sender of the 875 * message has the capability @cap in the user namespace @user_ns. 876 */ 877 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, 878 struct user_namespace *user_ns, int cap) 879 { 880 return ((nsp->flags & NETLINK_SKB_DST) || 881 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && 882 ns_capable(user_ns, cap); 883 } 884 EXPORT_SYMBOL(__netlink_ns_capable); 885 886 /** 887 * netlink_ns_capable - General netlink message capability test 888 * @skb: socket buffer holding a netlink command from userspace 889 * @user_ns: The user namespace of the capability to use 890 * @cap: The capability to use 891 * 892 * Test to see if the opener of the socket we received the message 893 * from had when the netlink socket was created and the sender of the 894 * message has the capability @cap in the user namespace @user_ns. 895 */ 896 bool netlink_ns_capable(const struct sk_buff *skb, 897 struct user_namespace *user_ns, int cap) 898 { 899 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); 900 } 901 EXPORT_SYMBOL(netlink_ns_capable); 902 903 /** 904 * netlink_capable - Netlink global message capability test 905 * @skb: socket buffer holding a netlink command from userspace 906 * @cap: The capability to use 907 * 908 * Test to see if the opener of the socket we received the message 909 * from had when the netlink socket was created and the sender of the 910 * message has the capability @cap in all user namespaces. 911 */ 912 bool netlink_capable(const struct sk_buff *skb, int cap) 913 { 914 return netlink_ns_capable(skb, &init_user_ns, cap); 915 } 916 EXPORT_SYMBOL(netlink_capable); 917 918 /** 919 * netlink_net_capable - Netlink network namespace message capability test 920 * @skb: socket buffer holding a netlink command from userspace 921 * @cap: The capability to use 922 * 923 * Test to see if the opener of the socket we received the message 924 * from had when the netlink socket was created and the sender of the 925 * message has the capability @cap over the network namespace of 926 * the socket we received the message from. 927 */ 928 bool netlink_net_capable(const struct sk_buff *skb, int cap) 929 { 930 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); 931 } 932 EXPORT_SYMBOL(netlink_net_capable); 933 934 static inline int netlink_allowed(const struct socket *sock, unsigned int flag) 935 { 936 return (nl_table[sock->sk->sk_protocol].flags & flag) || 937 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 938 } 939 940 static void 941 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 942 { 943 struct netlink_sock *nlk = nlk_sk(sk); 944 945 if (nlk->subscriptions && !subscriptions) 946 __sk_del_bind_node(sk); 947 else if (!nlk->subscriptions && subscriptions) 948 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 949 nlk->subscriptions = subscriptions; 950 } 951 952 static int netlink_realloc_groups(struct sock *sk) 953 { 954 struct netlink_sock *nlk = nlk_sk(sk); 955 unsigned int groups; 956 unsigned long *new_groups; 957 int err = 0; 958 959 netlink_table_grab(); 960 961 groups = nl_table[sk->sk_protocol].groups; 962 if (!nl_table[sk->sk_protocol].registered) { 963 err = -ENOENT; 964 goto out_unlock; 965 } 966 967 if (nlk->ngroups >= groups) 968 goto out_unlock; 969 970 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 971 if (new_groups == NULL) { 972 err = -ENOMEM; 973 goto out_unlock; 974 } 975 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 976 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 977 978 nlk->groups = new_groups; 979 nlk->ngroups = groups; 980 out_unlock: 981 netlink_table_ungrab(); 982 return err; 983 } 984 985 static void netlink_undo_bind(int group, long unsigned int groups, 986 struct sock *sk) 987 { 988 struct netlink_sock *nlk = nlk_sk(sk); 989 int undo; 990 991 if (!nlk->netlink_unbind) 992 return; 993 994 for (undo = 0; undo < group; undo++) 995 if (test_bit(undo, &groups)) 996 nlk->netlink_unbind(sock_net(sk), undo + 1); 997 } 998 999 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 1000 int addr_len) 1001 { 1002 struct sock *sk = sock->sk; 1003 struct net *net = sock_net(sk); 1004 struct netlink_sock *nlk = nlk_sk(sk); 1005 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1006 int err = 0; 1007 unsigned long groups; 1008 bool bound; 1009 1010 if (addr_len < sizeof(struct sockaddr_nl)) 1011 return -EINVAL; 1012 1013 if (nladdr->nl_family != AF_NETLINK) 1014 return -EINVAL; 1015 groups = nladdr->nl_groups; 1016 1017 /* Only superuser is allowed to listen multicasts */ 1018 if (groups) { 1019 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1020 return -EPERM; 1021 err = netlink_realloc_groups(sk); 1022 if (err) 1023 return err; 1024 } 1025 1026 if (nlk->ngroups < BITS_PER_LONG) 1027 groups &= (1UL << nlk->ngroups) - 1; 1028 1029 /* Paired with WRITE_ONCE() in netlink_insert() */ 1030 bound = READ_ONCE(nlk->bound); 1031 if (bound) { 1032 /* Ensure nlk->portid is up-to-date. */ 1033 smp_rmb(); 1034 1035 if (nladdr->nl_pid != nlk->portid) 1036 return -EINVAL; 1037 } 1038 1039 if (nlk->netlink_bind && groups) { 1040 int group; 1041 1042 /* nl_groups is a u32, so cap the maximum groups we can bind */ 1043 for (group = 0; group < BITS_PER_TYPE(u32); group++) { 1044 if (!test_bit(group, &groups)) 1045 continue; 1046 err = nlk->netlink_bind(net, group + 1); 1047 if (!err) 1048 continue; 1049 netlink_undo_bind(group, groups, sk); 1050 return err; 1051 } 1052 } 1053 1054 /* No need for barriers here as we return to user-space without 1055 * using any of the bound attributes. 1056 */ 1057 netlink_lock_table(); 1058 if (!bound) { 1059 err = nladdr->nl_pid ? 1060 netlink_insert(sk, nladdr->nl_pid) : 1061 netlink_autobind(sock); 1062 if (err) { 1063 netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); 1064 goto unlock; 1065 } 1066 } 1067 1068 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 1069 goto unlock; 1070 netlink_unlock_table(); 1071 1072 netlink_table_grab(); 1073 netlink_update_subscriptions(sk, nlk->subscriptions + 1074 hweight32(groups) - 1075 hweight32(nlk->groups[0])); 1076 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; 1077 netlink_update_listeners(sk); 1078 netlink_table_ungrab(); 1079 1080 return 0; 1081 1082 unlock: 1083 netlink_unlock_table(); 1084 return err; 1085 } 1086 1087 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 1088 int alen, int flags) 1089 { 1090 int err = 0; 1091 struct sock *sk = sock->sk; 1092 struct netlink_sock *nlk = nlk_sk(sk); 1093 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1094 1095 if (alen < sizeof(addr->sa_family)) 1096 return -EINVAL; 1097 1098 if (addr->sa_family == AF_UNSPEC) { 1099 sk->sk_state = NETLINK_UNCONNECTED; 1100 nlk->dst_portid = 0; 1101 nlk->dst_group = 0; 1102 return 0; 1103 } 1104 if (addr->sa_family != AF_NETLINK) 1105 return -EINVAL; 1106 1107 if (alen < sizeof(struct sockaddr_nl)) 1108 return -EINVAL; 1109 1110 if ((nladdr->nl_groups || nladdr->nl_pid) && 1111 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1112 return -EPERM; 1113 1114 /* No need for barriers here as we return to user-space without 1115 * using any of the bound attributes. 1116 * Paired with WRITE_ONCE() in netlink_insert(). 1117 */ 1118 if (!READ_ONCE(nlk->bound)) 1119 err = netlink_autobind(sock); 1120 1121 if (err == 0) { 1122 sk->sk_state = NETLINK_CONNECTED; 1123 nlk->dst_portid = nladdr->nl_pid; 1124 nlk->dst_group = ffs(nladdr->nl_groups); 1125 } 1126 1127 return err; 1128 } 1129 1130 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1131 int peer) 1132 { 1133 struct sock *sk = sock->sk; 1134 struct netlink_sock *nlk = nlk_sk(sk); 1135 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 1136 1137 nladdr->nl_family = AF_NETLINK; 1138 nladdr->nl_pad = 0; 1139 1140 if (peer) { 1141 nladdr->nl_pid = nlk->dst_portid; 1142 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 1143 } else { 1144 nladdr->nl_pid = nlk->portid; 1145 netlink_lock_table(); 1146 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1147 netlink_unlock_table(); 1148 } 1149 return sizeof(*nladdr); 1150 } 1151 1152 static int netlink_ioctl(struct socket *sock, unsigned int cmd, 1153 unsigned long arg) 1154 { 1155 /* try to hand this ioctl down to the NIC drivers. 1156 */ 1157 return -ENOIOCTLCMD; 1158 } 1159 1160 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1161 { 1162 struct sock *sock; 1163 struct netlink_sock *nlk; 1164 1165 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 1166 if (!sock) 1167 return ERR_PTR(-ECONNREFUSED); 1168 1169 /* Don't bother queuing skb if kernel socket has no input function */ 1170 nlk = nlk_sk(sock); 1171 if (sock->sk_state == NETLINK_CONNECTED && 1172 nlk->dst_portid != nlk_sk(ssk)->portid) { 1173 sock_put(sock); 1174 return ERR_PTR(-ECONNREFUSED); 1175 } 1176 return sock; 1177 } 1178 1179 struct sock *netlink_getsockbyfilp(struct file *filp) 1180 { 1181 struct inode *inode = file_inode(filp); 1182 struct sock *sock; 1183 1184 if (!S_ISSOCK(inode->i_mode)) 1185 return ERR_PTR(-ENOTSOCK); 1186 1187 sock = SOCKET_I(inode)->sk; 1188 if (sock->sk_family != AF_NETLINK) 1189 return ERR_PTR(-EINVAL); 1190 1191 sock_hold(sock); 1192 return sock; 1193 } 1194 1195 static struct sk_buff *netlink_alloc_large_skb(unsigned int size, 1196 int broadcast) 1197 { 1198 struct sk_buff *skb; 1199 void *data; 1200 1201 if (size <= NLMSG_GOODSIZE || broadcast) 1202 return alloc_skb(size, GFP_KERNEL); 1203 1204 size = SKB_DATA_ALIGN(size) + 1205 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1206 1207 data = vmalloc(size); 1208 if (data == NULL) 1209 return NULL; 1210 1211 skb = __build_skb(data, size); 1212 if (skb == NULL) 1213 vfree(data); 1214 else 1215 skb->destructor = netlink_skb_destructor; 1216 1217 return skb; 1218 } 1219 1220 /* 1221 * Attach a skb to a netlink socket. 1222 * The caller must hold a reference to the destination socket. On error, the 1223 * reference is dropped. The skb is not send to the destination, just all 1224 * all error checks are performed and memory in the queue is reserved. 1225 * Return values: 1226 * < 0: error. skb freed, reference to sock dropped. 1227 * 0: continue 1228 * 1: repeat lookup - reference dropped while waiting for socket memory. 1229 */ 1230 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1231 long *timeo, struct sock *ssk) 1232 { 1233 struct netlink_sock *nlk; 1234 1235 nlk = nlk_sk(sk); 1236 1237 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1238 test_bit(NETLINK_S_CONGESTED, &nlk->state))) { 1239 DECLARE_WAITQUEUE(wait, current); 1240 if (!*timeo) { 1241 if (!ssk || netlink_is_kernel(ssk)) 1242 netlink_overrun(sk); 1243 sock_put(sk); 1244 kfree_skb(skb); 1245 return -EAGAIN; 1246 } 1247 1248 __set_current_state(TASK_INTERRUPTIBLE); 1249 add_wait_queue(&nlk->wait, &wait); 1250 1251 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1252 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1253 !sock_flag(sk, SOCK_DEAD)) 1254 *timeo = schedule_timeout(*timeo); 1255 1256 __set_current_state(TASK_RUNNING); 1257 remove_wait_queue(&nlk->wait, &wait); 1258 sock_put(sk); 1259 1260 if (signal_pending(current)) { 1261 kfree_skb(skb); 1262 return sock_intr_errno(*timeo); 1263 } 1264 return 1; 1265 } 1266 netlink_skb_set_owner_r(skb, sk); 1267 return 0; 1268 } 1269 1270 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1271 { 1272 int len = skb->len; 1273 1274 netlink_deliver_tap(sock_net(sk), skb); 1275 1276 skb_queue_tail(&sk->sk_receive_queue, skb); 1277 sk->sk_data_ready(sk); 1278 return len; 1279 } 1280 1281 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1282 { 1283 int len = __netlink_sendskb(sk, skb); 1284 1285 sock_put(sk); 1286 return len; 1287 } 1288 1289 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 1290 { 1291 kfree_skb(skb); 1292 sock_put(sk); 1293 } 1294 1295 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 1296 { 1297 int delta; 1298 1299 WARN_ON(skb->sk != NULL); 1300 delta = skb->end - skb->tail; 1301 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1302 return skb; 1303 1304 if (skb_shared(skb)) { 1305 struct sk_buff *nskb = skb_clone(skb, allocation); 1306 if (!nskb) 1307 return skb; 1308 consume_skb(skb); 1309 skb = nskb; 1310 } 1311 1312 pskb_expand_head(skb, 0, -delta, 1313 (allocation & ~__GFP_DIRECT_RECLAIM) | 1314 __GFP_NOWARN | __GFP_NORETRY); 1315 return skb; 1316 } 1317 1318 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1319 struct sock *ssk) 1320 { 1321 int ret; 1322 struct netlink_sock *nlk = nlk_sk(sk); 1323 1324 ret = -ECONNREFUSED; 1325 if (nlk->netlink_rcv != NULL) { 1326 ret = skb->len; 1327 netlink_skb_set_owner_r(skb, sk); 1328 NETLINK_CB(skb).sk = ssk; 1329 netlink_deliver_tap_kernel(sk, ssk, skb); 1330 nlk->netlink_rcv(skb); 1331 consume_skb(skb); 1332 } else { 1333 kfree_skb(skb); 1334 } 1335 sock_put(sk); 1336 return ret; 1337 } 1338 1339 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 1340 u32 portid, int nonblock) 1341 { 1342 struct sock *sk; 1343 int err; 1344 long timeo; 1345 1346 skb = netlink_trim(skb, gfp_any()); 1347 1348 timeo = sock_sndtimeo(ssk, nonblock); 1349 retry: 1350 sk = netlink_getsockbyportid(ssk, portid); 1351 if (IS_ERR(sk)) { 1352 kfree_skb(skb); 1353 return PTR_ERR(sk); 1354 } 1355 if (netlink_is_kernel(sk)) 1356 return netlink_unicast_kernel(sk, skb, ssk); 1357 1358 if (sk_filter(sk, skb)) { 1359 err = skb->len; 1360 kfree_skb(skb); 1361 sock_put(sk); 1362 return err; 1363 } 1364 1365 err = netlink_attachskb(sk, skb, &timeo, ssk); 1366 if (err == 1) 1367 goto retry; 1368 if (err) 1369 return err; 1370 1371 return netlink_sendskb(sk, skb); 1372 } 1373 EXPORT_SYMBOL(netlink_unicast); 1374 1375 int netlink_has_listeners(struct sock *sk, unsigned int group) 1376 { 1377 int res = 0; 1378 struct listeners *listeners; 1379 1380 BUG_ON(!netlink_is_kernel(sk)); 1381 1382 rcu_read_lock(); 1383 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 1384 1385 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 1386 res = test_bit(group - 1, listeners->masks); 1387 1388 rcu_read_unlock(); 1389 1390 return res; 1391 } 1392 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1393 1394 bool netlink_strict_get_check(struct sk_buff *skb) 1395 { 1396 const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk); 1397 1398 return nlk->flags & NETLINK_F_STRICT_CHK; 1399 } 1400 EXPORT_SYMBOL_GPL(netlink_strict_get_check); 1401 1402 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1403 { 1404 struct netlink_sock *nlk = nlk_sk(sk); 1405 1406 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1407 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1408 netlink_skb_set_owner_r(skb, sk); 1409 __netlink_sendskb(sk, skb); 1410 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1411 } 1412 return -1; 1413 } 1414 1415 struct netlink_broadcast_data { 1416 struct sock *exclude_sk; 1417 struct net *net; 1418 u32 portid; 1419 u32 group; 1420 int failure; 1421 int delivery_failure; 1422 int congested; 1423 int delivered; 1424 gfp_t allocation; 1425 struct sk_buff *skb, *skb2; 1426 }; 1427 1428 static void do_one_broadcast(struct sock *sk, 1429 struct netlink_broadcast_data *p) 1430 { 1431 struct netlink_sock *nlk = nlk_sk(sk); 1432 int val; 1433 1434 if (p->exclude_sk == sk) 1435 return; 1436 1437 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1438 !test_bit(p->group - 1, nlk->groups)) 1439 return; 1440 1441 if (!net_eq(sock_net(sk), p->net)) { 1442 if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) 1443 return; 1444 1445 if (!peernet_has_id(sock_net(sk), p->net)) 1446 return; 1447 1448 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, 1449 CAP_NET_BROADCAST)) 1450 return; 1451 } 1452 1453 if (p->failure) { 1454 netlink_overrun(sk); 1455 return; 1456 } 1457 1458 sock_hold(sk); 1459 if (p->skb2 == NULL) { 1460 if (skb_shared(p->skb)) { 1461 p->skb2 = skb_clone(p->skb, p->allocation); 1462 } else { 1463 p->skb2 = skb_get(p->skb); 1464 /* 1465 * skb ownership may have been set when 1466 * delivered to a previous socket. 1467 */ 1468 skb_orphan(p->skb2); 1469 } 1470 } 1471 if (p->skb2 == NULL) { 1472 netlink_overrun(sk); 1473 /* Clone failed. Notify ALL listeners. */ 1474 p->failure = 1; 1475 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1476 p->delivery_failure = 1; 1477 goto out; 1478 } 1479 if (sk_filter(sk, p->skb2)) { 1480 kfree_skb(p->skb2); 1481 p->skb2 = NULL; 1482 goto out; 1483 } 1484 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); 1485 if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) 1486 NETLINK_CB(p->skb2).nsid_is_set = true; 1487 val = netlink_broadcast_deliver(sk, p->skb2); 1488 if (val < 0) { 1489 netlink_overrun(sk); 1490 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1491 p->delivery_failure = 1; 1492 } else { 1493 p->congested |= val; 1494 p->delivered = 1; 1495 p->skb2 = NULL; 1496 } 1497 out: 1498 sock_put(sk); 1499 } 1500 1501 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1502 u32 group, gfp_t allocation) 1503 { 1504 struct net *net = sock_net(ssk); 1505 struct netlink_broadcast_data info; 1506 struct sock *sk; 1507 1508 skb = netlink_trim(skb, allocation); 1509 1510 info.exclude_sk = ssk; 1511 info.net = net; 1512 info.portid = portid; 1513 info.group = group; 1514 info.failure = 0; 1515 info.delivery_failure = 0; 1516 info.congested = 0; 1517 info.delivered = 0; 1518 info.allocation = allocation; 1519 info.skb = skb; 1520 info.skb2 = NULL; 1521 1522 /* While we sleep in clone, do not allow to change socket list */ 1523 1524 netlink_lock_table(); 1525 1526 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1527 do_one_broadcast(sk, &info); 1528 1529 consume_skb(skb); 1530 1531 netlink_unlock_table(); 1532 1533 if (info.delivery_failure) { 1534 kfree_skb(info.skb2); 1535 return -ENOBUFS; 1536 } 1537 consume_skb(info.skb2); 1538 1539 if (info.delivered) { 1540 if (info.congested && gfpflags_allow_blocking(allocation)) 1541 yield(); 1542 return 0; 1543 } 1544 return -ESRCH; 1545 } 1546 EXPORT_SYMBOL(netlink_broadcast); 1547 1548 struct netlink_set_err_data { 1549 struct sock *exclude_sk; 1550 u32 portid; 1551 u32 group; 1552 int code; 1553 }; 1554 1555 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1556 { 1557 struct netlink_sock *nlk = nlk_sk(sk); 1558 int ret = 0; 1559 1560 if (sk == p->exclude_sk) 1561 goto out; 1562 1563 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1564 goto out; 1565 1566 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1567 !test_bit(p->group - 1, nlk->groups)) 1568 goto out; 1569 1570 if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) { 1571 ret = 1; 1572 goto out; 1573 } 1574 1575 sk->sk_err = p->code; 1576 sk_error_report(sk); 1577 out: 1578 return ret; 1579 } 1580 1581 /** 1582 * netlink_set_err - report error to broadcast listeners 1583 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1584 * @portid: the PORTID of a process that we want to skip (if any) 1585 * @group: the broadcast group that will notice the error 1586 * @code: error code, must be negative (as usual in kernelspace) 1587 * 1588 * This function returns the number of broadcast listeners that have set the 1589 * NETLINK_NO_ENOBUFS socket option. 1590 */ 1591 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1592 { 1593 struct netlink_set_err_data info; 1594 struct sock *sk; 1595 int ret = 0; 1596 1597 info.exclude_sk = ssk; 1598 info.portid = portid; 1599 info.group = group; 1600 /* sk->sk_err wants a positive error value */ 1601 info.code = -code; 1602 1603 read_lock(&nl_table_lock); 1604 1605 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1606 ret += do_one_set_err(sk, &info); 1607 1608 read_unlock(&nl_table_lock); 1609 return ret; 1610 } 1611 EXPORT_SYMBOL(netlink_set_err); 1612 1613 /* must be called with netlink table grabbed */ 1614 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1615 unsigned int group, 1616 int is_new) 1617 { 1618 int old, new = !!is_new, subscriptions; 1619 1620 old = test_bit(group - 1, nlk->groups); 1621 subscriptions = nlk->subscriptions - old + new; 1622 if (new) 1623 __set_bit(group - 1, nlk->groups); 1624 else 1625 __clear_bit(group - 1, nlk->groups); 1626 netlink_update_subscriptions(&nlk->sk, subscriptions); 1627 netlink_update_listeners(&nlk->sk); 1628 } 1629 1630 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1631 sockptr_t optval, unsigned int optlen) 1632 { 1633 struct sock *sk = sock->sk; 1634 struct netlink_sock *nlk = nlk_sk(sk); 1635 unsigned int val = 0; 1636 int err; 1637 1638 if (level != SOL_NETLINK) 1639 return -ENOPROTOOPT; 1640 1641 if (optlen >= sizeof(int) && 1642 copy_from_sockptr(&val, optval, sizeof(val))) 1643 return -EFAULT; 1644 1645 switch (optname) { 1646 case NETLINK_PKTINFO: 1647 if (val) 1648 nlk->flags |= NETLINK_F_RECV_PKTINFO; 1649 else 1650 nlk->flags &= ~NETLINK_F_RECV_PKTINFO; 1651 err = 0; 1652 break; 1653 case NETLINK_ADD_MEMBERSHIP: 1654 case NETLINK_DROP_MEMBERSHIP: { 1655 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1656 return -EPERM; 1657 err = netlink_realloc_groups(sk); 1658 if (err) 1659 return err; 1660 if (!val || val - 1 >= nlk->ngroups) 1661 return -EINVAL; 1662 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { 1663 err = nlk->netlink_bind(sock_net(sk), val); 1664 if (err) 1665 return err; 1666 } 1667 netlink_table_grab(); 1668 netlink_update_socket_mc(nlk, val, 1669 optname == NETLINK_ADD_MEMBERSHIP); 1670 netlink_table_ungrab(); 1671 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) 1672 nlk->netlink_unbind(sock_net(sk), val); 1673 1674 err = 0; 1675 break; 1676 } 1677 case NETLINK_BROADCAST_ERROR: 1678 if (val) 1679 nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR; 1680 else 1681 nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR; 1682 err = 0; 1683 break; 1684 case NETLINK_NO_ENOBUFS: 1685 if (val) { 1686 nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS; 1687 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 1688 wake_up_interruptible(&nlk->wait); 1689 } else { 1690 nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS; 1691 } 1692 err = 0; 1693 break; 1694 case NETLINK_LISTEN_ALL_NSID: 1695 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1696 return -EPERM; 1697 1698 if (val) 1699 nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; 1700 else 1701 nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; 1702 err = 0; 1703 break; 1704 case NETLINK_CAP_ACK: 1705 if (val) 1706 nlk->flags |= NETLINK_F_CAP_ACK; 1707 else 1708 nlk->flags &= ~NETLINK_F_CAP_ACK; 1709 err = 0; 1710 break; 1711 case NETLINK_EXT_ACK: 1712 if (val) 1713 nlk->flags |= NETLINK_F_EXT_ACK; 1714 else 1715 nlk->flags &= ~NETLINK_F_EXT_ACK; 1716 err = 0; 1717 break; 1718 case NETLINK_GET_STRICT_CHK: 1719 if (val) 1720 nlk->flags |= NETLINK_F_STRICT_CHK; 1721 else 1722 nlk->flags &= ~NETLINK_F_STRICT_CHK; 1723 err = 0; 1724 break; 1725 default: 1726 err = -ENOPROTOOPT; 1727 } 1728 return err; 1729 } 1730 1731 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1732 char __user *optval, int __user *optlen) 1733 { 1734 struct sock *sk = sock->sk; 1735 struct netlink_sock *nlk = nlk_sk(sk); 1736 int len, val, err; 1737 1738 if (level != SOL_NETLINK) 1739 return -ENOPROTOOPT; 1740 1741 if (get_user(len, optlen)) 1742 return -EFAULT; 1743 if (len < 0) 1744 return -EINVAL; 1745 1746 switch (optname) { 1747 case NETLINK_PKTINFO: 1748 if (len < sizeof(int)) 1749 return -EINVAL; 1750 len = sizeof(int); 1751 val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0; 1752 if (put_user(len, optlen) || 1753 put_user(val, optval)) 1754 return -EFAULT; 1755 err = 0; 1756 break; 1757 case NETLINK_BROADCAST_ERROR: 1758 if (len < sizeof(int)) 1759 return -EINVAL; 1760 len = sizeof(int); 1761 val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0; 1762 if (put_user(len, optlen) || 1763 put_user(val, optval)) 1764 return -EFAULT; 1765 err = 0; 1766 break; 1767 case NETLINK_NO_ENOBUFS: 1768 if (len < sizeof(int)) 1769 return -EINVAL; 1770 len = sizeof(int); 1771 val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0; 1772 if (put_user(len, optlen) || 1773 put_user(val, optval)) 1774 return -EFAULT; 1775 err = 0; 1776 break; 1777 case NETLINK_LIST_MEMBERSHIPS: { 1778 int pos, idx, shift; 1779 1780 err = 0; 1781 netlink_lock_table(); 1782 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 1783 if (len - pos < sizeof(u32)) 1784 break; 1785 1786 idx = pos / sizeof(unsigned long); 1787 shift = (pos % sizeof(unsigned long)) * 8; 1788 if (put_user((u32)(nlk->groups[idx] >> shift), 1789 (u32 __user *)(optval + pos))) { 1790 err = -EFAULT; 1791 break; 1792 } 1793 } 1794 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) 1795 err = -EFAULT; 1796 netlink_unlock_table(); 1797 break; 1798 } 1799 case NETLINK_CAP_ACK: 1800 if (len < sizeof(int)) 1801 return -EINVAL; 1802 len = sizeof(int); 1803 val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0; 1804 if (put_user(len, optlen) || 1805 put_user(val, optval)) 1806 return -EFAULT; 1807 err = 0; 1808 break; 1809 case NETLINK_EXT_ACK: 1810 if (len < sizeof(int)) 1811 return -EINVAL; 1812 len = sizeof(int); 1813 val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0; 1814 if (put_user(len, optlen) || put_user(val, optval)) 1815 return -EFAULT; 1816 err = 0; 1817 break; 1818 case NETLINK_GET_STRICT_CHK: 1819 if (len < sizeof(int)) 1820 return -EINVAL; 1821 len = sizeof(int); 1822 val = nlk->flags & NETLINK_F_STRICT_CHK ? 1 : 0; 1823 if (put_user(len, optlen) || put_user(val, optval)) 1824 return -EFAULT; 1825 err = 0; 1826 break; 1827 default: 1828 err = -ENOPROTOOPT; 1829 } 1830 return err; 1831 } 1832 1833 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1834 { 1835 struct nl_pktinfo info; 1836 1837 info.group = NETLINK_CB(skb).dst_group; 1838 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1839 } 1840 1841 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, 1842 struct sk_buff *skb) 1843 { 1844 if (!NETLINK_CB(skb).nsid_is_set) 1845 return; 1846 1847 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), 1848 &NETLINK_CB(skb).nsid); 1849 } 1850 1851 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1852 { 1853 struct sock *sk = sock->sk; 1854 struct netlink_sock *nlk = nlk_sk(sk); 1855 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1856 u32 dst_portid; 1857 u32 dst_group; 1858 struct sk_buff *skb; 1859 int err; 1860 struct scm_cookie scm; 1861 u32 netlink_skb_flags = 0; 1862 1863 if (msg->msg_flags & MSG_OOB) 1864 return -EOPNOTSUPP; 1865 1866 if (len == 0) { 1867 pr_warn_once("Zero length message leads to an empty skb\n"); 1868 return -ENODATA; 1869 } 1870 1871 err = scm_send(sock, msg, &scm, true); 1872 if (err < 0) 1873 return err; 1874 1875 if (msg->msg_namelen) { 1876 err = -EINVAL; 1877 if (msg->msg_namelen < sizeof(struct sockaddr_nl)) 1878 goto out; 1879 if (addr->nl_family != AF_NETLINK) 1880 goto out; 1881 dst_portid = addr->nl_pid; 1882 dst_group = ffs(addr->nl_groups); 1883 err = -EPERM; 1884 if ((dst_group || dst_portid) && 1885 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1886 goto out; 1887 netlink_skb_flags |= NETLINK_SKB_DST; 1888 } else { 1889 dst_portid = nlk->dst_portid; 1890 dst_group = nlk->dst_group; 1891 } 1892 1893 /* Paired with WRITE_ONCE() in netlink_insert() */ 1894 if (!READ_ONCE(nlk->bound)) { 1895 err = netlink_autobind(sock); 1896 if (err) 1897 goto out; 1898 } else { 1899 /* Ensure nlk is hashed and visible. */ 1900 smp_rmb(); 1901 } 1902 1903 err = -EMSGSIZE; 1904 if (len > sk->sk_sndbuf - 32) 1905 goto out; 1906 err = -ENOBUFS; 1907 skb = netlink_alloc_large_skb(len, dst_group); 1908 if (skb == NULL) 1909 goto out; 1910 1911 NETLINK_CB(skb).portid = nlk->portid; 1912 NETLINK_CB(skb).dst_group = dst_group; 1913 NETLINK_CB(skb).creds = scm.creds; 1914 NETLINK_CB(skb).flags = netlink_skb_flags; 1915 1916 err = -EFAULT; 1917 if (memcpy_from_msg(skb_put(skb, len), msg, len)) { 1918 kfree_skb(skb); 1919 goto out; 1920 } 1921 1922 err = security_netlink_send(sk, skb); 1923 if (err) { 1924 kfree_skb(skb); 1925 goto out; 1926 } 1927 1928 if (dst_group) { 1929 refcount_inc(&skb->users); 1930 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 1931 } 1932 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags & MSG_DONTWAIT); 1933 1934 out: 1935 scm_destroy(&scm); 1936 return err; 1937 } 1938 1939 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1940 int flags) 1941 { 1942 struct scm_cookie scm; 1943 struct sock *sk = sock->sk; 1944 struct netlink_sock *nlk = nlk_sk(sk); 1945 size_t copied; 1946 struct sk_buff *skb, *data_skb; 1947 int err, ret; 1948 1949 if (flags & MSG_OOB) 1950 return -EOPNOTSUPP; 1951 1952 copied = 0; 1953 1954 skb = skb_recv_datagram(sk, flags, &err); 1955 if (skb == NULL) 1956 goto out; 1957 1958 data_skb = skb; 1959 1960 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1961 if (unlikely(skb_shinfo(skb)->frag_list)) { 1962 /* 1963 * If this skb has a frag_list, then here that means that we 1964 * will have to use the frag_list skb's data for compat tasks 1965 * and the regular skb's data for normal (non-compat) tasks. 1966 * 1967 * If we need to send the compat skb, assign it to the 1968 * 'data_skb' variable so that it will be used below for data 1969 * copying. We keep 'skb' for everything else, including 1970 * freeing both later. 1971 */ 1972 if (flags & MSG_CMSG_COMPAT) 1973 data_skb = skb_shinfo(skb)->frag_list; 1974 } 1975 #endif 1976 1977 /* Record the max length of recvmsg() calls for future allocations */ 1978 nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); 1979 nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, 1980 SKB_WITH_OVERHEAD(32768)); 1981 1982 copied = data_skb->len; 1983 if (len < copied) { 1984 msg->msg_flags |= MSG_TRUNC; 1985 copied = len; 1986 } 1987 1988 err = skb_copy_datagram_msg(data_skb, 0, msg, copied); 1989 1990 if (msg->msg_name) { 1991 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1992 addr->nl_family = AF_NETLINK; 1993 addr->nl_pad = 0; 1994 addr->nl_pid = NETLINK_CB(skb).portid; 1995 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1996 msg->msg_namelen = sizeof(*addr); 1997 } 1998 1999 if (nlk->flags & NETLINK_F_RECV_PKTINFO) 2000 netlink_cmsg_recv_pktinfo(msg, skb); 2001 if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) 2002 netlink_cmsg_listen_all_nsid(sk, msg, skb); 2003 2004 memset(&scm, 0, sizeof(scm)); 2005 scm.creds = *NETLINK_CREDS(skb); 2006 if (flags & MSG_TRUNC) 2007 copied = data_skb->len; 2008 2009 skb_free_datagram(sk, skb); 2010 2011 if (nlk->cb_running && 2012 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 2013 ret = netlink_dump(sk); 2014 if (ret) { 2015 sk->sk_err = -ret; 2016 sk_error_report(sk); 2017 } 2018 } 2019 2020 scm_recv(sock, msg, &scm, flags); 2021 out: 2022 netlink_rcv_wake(sk); 2023 return err ? : copied; 2024 } 2025 2026 static void netlink_data_ready(struct sock *sk) 2027 { 2028 BUG(); 2029 } 2030 2031 /* 2032 * We export these functions to other modules. They provide a 2033 * complete set of kernel non-blocking support for message 2034 * queueing. 2035 */ 2036 2037 struct sock * 2038 __netlink_kernel_create(struct net *net, int unit, struct module *module, 2039 struct netlink_kernel_cfg *cfg) 2040 { 2041 struct socket *sock; 2042 struct sock *sk; 2043 struct netlink_sock *nlk; 2044 struct listeners *listeners = NULL; 2045 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; 2046 unsigned int groups; 2047 2048 BUG_ON(!nl_table); 2049 2050 if (unit < 0 || unit >= MAX_LINKS) 2051 return NULL; 2052 2053 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 2054 return NULL; 2055 2056 if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) 2057 goto out_sock_release_nosk; 2058 2059 sk = sock->sk; 2060 2061 if (!cfg || cfg->groups < 32) 2062 groups = 32; 2063 else 2064 groups = cfg->groups; 2065 2066 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2067 if (!listeners) 2068 goto out_sock_release; 2069 2070 sk->sk_data_ready = netlink_data_ready; 2071 if (cfg && cfg->input) 2072 nlk_sk(sk)->netlink_rcv = cfg->input; 2073 2074 if (netlink_insert(sk, 0)) 2075 goto out_sock_release; 2076 2077 nlk = nlk_sk(sk); 2078 nlk->flags |= NETLINK_F_KERNEL_SOCKET; 2079 2080 netlink_table_grab(); 2081 if (!nl_table[unit].registered) { 2082 nl_table[unit].groups = groups; 2083 rcu_assign_pointer(nl_table[unit].listeners, listeners); 2084 nl_table[unit].cb_mutex = cb_mutex; 2085 nl_table[unit].module = module; 2086 if (cfg) { 2087 nl_table[unit].bind = cfg->bind; 2088 nl_table[unit].unbind = cfg->unbind; 2089 nl_table[unit].flags = cfg->flags; 2090 if (cfg->compare) 2091 nl_table[unit].compare = cfg->compare; 2092 } 2093 nl_table[unit].registered = 1; 2094 } else { 2095 kfree(listeners); 2096 nl_table[unit].registered++; 2097 } 2098 netlink_table_ungrab(); 2099 return sk; 2100 2101 out_sock_release: 2102 kfree(listeners); 2103 netlink_kernel_release(sk); 2104 return NULL; 2105 2106 out_sock_release_nosk: 2107 sock_release(sock); 2108 return NULL; 2109 } 2110 EXPORT_SYMBOL(__netlink_kernel_create); 2111 2112 void 2113 netlink_kernel_release(struct sock *sk) 2114 { 2115 if (sk == NULL || sk->sk_socket == NULL) 2116 return; 2117 2118 sock_release(sk->sk_socket); 2119 } 2120 EXPORT_SYMBOL(netlink_kernel_release); 2121 2122 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 2123 { 2124 struct listeners *new, *old; 2125 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 2126 2127 if (groups < 32) 2128 groups = 32; 2129 2130 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 2131 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 2132 if (!new) 2133 return -ENOMEM; 2134 old = nl_deref_protected(tbl->listeners); 2135 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 2136 rcu_assign_pointer(tbl->listeners, new); 2137 2138 kfree_rcu(old, rcu); 2139 } 2140 tbl->groups = groups; 2141 2142 return 0; 2143 } 2144 2145 /** 2146 * netlink_change_ngroups - change number of multicast groups 2147 * 2148 * This changes the number of multicast groups that are available 2149 * on a certain netlink family. Note that it is not possible to 2150 * change the number of groups to below 32. Also note that it does 2151 * not implicitly call netlink_clear_multicast_users() when the 2152 * number of groups is reduced. 2153 * 2154 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 2155 * @groups: The new number of groups. 2156 */ 2157 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 2158 { 2159 int err; 2160 2161 netlink_table_grab(); 2162 err = __netlink_change_ngroups(sk, groups); 2163 netlink_table_ungrab(); 2164 2165 return err; 2166 } 2167 2168 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2169 { 2170 struct sock *sk; 2171 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 2172 2173 sk_for_each_bound(sk, &tbl->mc_list) 2174 netlink_update_socket_mc(nlk_sk(sk), group, 0); 2175 } 2176 2177 struct nlmsghdr * 2178 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2179 { 2180 struct nlmsghdr *nlh; 2181 int size = nlmsg_msg_size(len); 2182 2183 nlh = skb_put(skb, NLMSG_ALIGN(size)); 2184 nlh->nlmsg_type = type; 2185 nlh->nlmsg_len = size; 2186 nlh->nlmsg_flags = flags; 2187 nlh->nlmsg_pid = portid; 2188 nlh->nlmsg_seq = seq; 2189 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2190 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2191 return nlh; 2192 } 2193 EXPORT_SYMBOL(__nlmsg_put); 2194 2195 /* 2196 * It looks a bit ugly. 2197 * It would be better to create kernel thread. 2198 */ 2199 2200 static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, 2201 struct netlink_callback *cb, 2202 struct netlink_ext_ack *extack) 2203 { 2204 struct nlmsghdr *nlh; 2205 2206 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno), 2207 NLM_F_MULTI | cb->answer_flags); 2208 if (WARN_ON(!nlh)) 2209 return -ENOBUFS; 2210 2211 nl_dump_check_consistent(cb, nlh); 2212 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno)); 2213 2214 if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) { 2215 nlh->nlmsg_flags |= NLM_F_ACK_TLVS; 2216 if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)) 2217 nlmsg_end(skb, nlh); 2218 } 2219 2220 return 0; 2221 } 2222 2223 static int netlink_dump(struct sock *sk) 2224 { 2225 struct netlink_sock *nlk = nlk_sk(sk); 2226 struct netlink_ext_ack extack = {}; 2227 struct netlink_callback *cb; 2228 struct sk_buff *skb = NULL; 2229 struct module *module; 2230 int err = -ENOBUFS; 2231 int alloc_min_size; 2232 int alloc_size; 2233 2234 mutex_lock(nlk->cb_mutex); 2235 if (!nlk->cb_running) { 2236 err = -EINVAL; 2237 goto errout_skb; 2238 } 2239 2240 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2241 goto errout_skb; 2242 2243 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2244 * required, but it makes sense to _attempt_ a 16K bytes allocation 2245 * to reduce number of system calls on dump operations, if user 2246 * ever provided a big enough buffer. 2247 */ 2248 cb = &nlk->cb; 2249 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2250 2251 if (alloc_min_size < nlk->max_recvmsg_len) { 2252 alloc_size = nlk->max_recvmsg_len; 2253 skb = alloc_skb(alloc_size, 2254 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | 2255 __GFP_NOWARN | __GFP_NORETRY); 2256 } 2257 if (!skb) { 2258 alloc_size = alloc_min_size; 2259 skb = alloc_skb(alloc_size, GFP_KERNEL); 2260 } 2261 if (!skb) 2262 goto errout_skb; 2263 2264 /* Trim skb to allocated size. User is expected to provide buffer as 2265 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at 2266 * netlink_recvmsg())). dump will pack as many smaller messages as 2267 * could fit within the allocated skb. skb is typically allocated 2268 * with larger space than required (could be as much as near 2x the 2269 * requested size with align to next power of 2 approach). Allowing 2270 * dump to use the excess space makes it difficult for a user to have a 2271 * reasonable static buffer based on the expected largest dump of a 2272 * single netdev. The outcome is MSG_TRUNC error. 2273 */ 2274 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2275 2276 /* Make sure malicious BPF programs can not read unitialized memory 2277 * from skb->head -> skb->data 2278 */ 2279 skb_reset_network_header(skb); 2280 skb_reset_mac_header(skb); 2281 2282 netlink_skb_set_owner_r(skb, sk); 2283 2284 if (nlk->dump_done_errno > 0) { 2285 cb->extack = &extack; 2286 nlk->dump_done_errno = cb->dump(skb, cb); 2287 cb->extack = NULL; 2288 } 2289 2290 if (nlk->dump_done_errno > 0 || 2291 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { 2292 mutex_unlock(nlk->cb_mutex); 2293 2294 if (sk_filter(sk, skb)) 2295 kfree_skb(skb); 2296 else 2297 __netlink_sendskb(sk, skb); 2298 return 0; 2299 } 2300 2301 if (netlink_dump_done(nlk, skb, cb, &extack)) 2302 goto errout_skb; 2303 2304 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 2305 /* frag_list skb's data is used for compat tasks 2306 * and the regular skb's data for normal (non-compat) tasks. 2307 * See netlink_recvmsg(). 2308 */ 2309 if (unlikely(skb_shinfo(skb)->frag_list)) { 2310 if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack)) 2311 goto errout_skb; 2312 } 2313 #endif 2314 2315 if (sk_filter(sk, skb)) 2316 kfree_skb(skb); 2317 else 2318 __netlink_sendskb(sk, skb); 2319 2320 if (cb->done) 2321 cb->done(cb); 2322 2323 nlk->cb_running = false; 2324 module = cb->module; 2325 skb = cb->skb; 2326 mutex_unlock(nlk->cb_mutex); 2327 module_put(module); 2328 consume_skb(skb); 2329 return 0; 2330 2331 errout_skb: 2332 mutex_unlock(nlk->cb_mutex); 2333 kfree_skb(skb); 2334 return err; 2335 } 2336 2337 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 2338 const struct nlmsghdr *nlh, 2339 struct netlink_dump_control *control) 2340 { 2341 struct netlink_sock *nlk, *nlk2; 2342 struct netlink_callback *cb; 2343 struct sock *sk; 2344 int ret; 2345 2346 refcount_inc(&skb->users); 2347 2348 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2349 if (sk == NULL) { 2350 ret = -ECONNREFUSED; 2351 goto error_free; 2352 } 2353 2354 nlk = nlk_sk(sk); 2355 mutex_lock(nlk->cb_mutex); 2356 /* A dump is in progress... */ 2357 if (nlk->cb_running) { 2358 ret = -EBUSY; 2359 goto error_unlock; 2360 } 2361 /* add reference of module which cb->dump belongs to */ 2362 if (!try_module_get(control->module)) { 2363 ret = -EPROTONOSUPPORT; 2364 goto error_unlock; 2365 } 2366 2367 cb = &nlk->cb; 2368 memset(cb, 0, sizeof(*cb)); 2369 cb->dump = control->dump; 2370 cb->done = control->done; 2371 cb->nlh = nlh; 2372 cb->data = control->data; 2373 cb->module = control->module; 2374 cb->min_dump_alloc = control->min_dump_alloc; 2375 cb->skb = skb; 2376 2377 nlk2 = nlk_sk(NETLINK_CB(skb).sk); 2378 cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK); 2379 2380 if (control->start) { 2381 ret = control->start(cb); 2382 if (ret) 2383 goto error_put; 2384 } 2385 2386 nlk->cb_running = true; 2387 nlk->dump_done_errno = INT_MAX; 2388 2389 mutex_unlock(nlk->cb_mutex); 2390 2391 ret = netlink_dump(sk); 2392 2393 sock_put(sk); 2394 2395 if (ret) 2396 return ret; 2397 2398 /* We successfully started a dump, by returning -EINTR we 2399 * signal not to send ACK even if it was requested. 2400 */ 2401 return -EINTR; 2402 2403 error_put: 2404 module_put(control->module); 2405 error_unlock: 2406 sock_put(sk); 2407 mutex_unlock(nlk->cb_mutex); 2408 error_free: 2409 kfree_skb(skb); 2410 return ret; 2411 } 2412 EXPORT_SYMBOL(__netlink_dump_start); 2413 2414 static size_t 2415 netlink_ack_tlv_len(struct netlink_sock *nlk, int err, 2416 const struct netlink_ext_ack *extack) 2417 { 2418 size_t tlvlen; 2419 2420 if (!extack || !(nlk->flags & NETLINK_F_EXT_ACK)) 2421 return 0; 2422 2423 tlvlen = 0; 2424 if (extack->_msg) 2425 tlvlen += nla_total_size(strlen(extack->_msg) + 1); 2426 if (extack->cookie_len) 2427 tlvlen += nla_total_size(extack->cookie_len); 2428 2429 /* Following attributes are only reported as error (not warning) */ 2430 if (!err) 2431 return tlvlen; 2432 2433 if (extack->bad_attr) 2434 tlvlen += nla_total_size(sizeof(u32)); 2435 if (extack->policy) 2436 tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy); 2437 if (extack->miss_type) 2438 tlvlen += nla_total_size(sizeof(u32)); 2439 if (extack->miss_nest) 2440 tlvlen += nla_total_size(sizeof(u32)); 2441 2442 return tlvlen; 2443 } 2444 2445 static void 2446 netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb, 2447 struct nlmsghdr *nlh, int err, 2448 const struct netlink_ext_ack *extack) 2449 { 2450 if (extack->_msg) 2451 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)); 2452 if (extack->cookie_len) 2453 WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, 2454 extack->cookie_len, extack->cookie)); 2455 2456 if (!err) 2457 return; 2458 2459 if (extack->bad_attr && 2460 !WARN_ON((u8 *)extack->bad_attr < in_skb->data || 2461 (u8 *)extack->bad_attr >= in_skb->data + in_skb->len)) 2462 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, 2463 (u8 *)extack->bad_attr - (u8 *)nlh)); 2464 if (extack->policy) 2465 netlink_policy_dump_write_attr(skb, extack->policy, 2466 NLMSGERR_ATTR_POLICY); 2467 if (extack->miss_type) 2468 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE, 2469 extack->miss_type)); 2470 if (extack->miss_nest && 2471 !WARN_ON((u8 *)extack->miss_nest < in_skb->data || 2472 (u8 *)extack->miss_nest > in_skb->data + in_skb->len)) 2473 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST, 2474 (u8 *)extack->miss_nest - (u8 *)nlh)); 2475 } 2476 2477 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, 2478 const struct netlink_ext_ack *extack) 2479 { 2480 struct sk_buff *skb; 2481 struct nlmsghdr *rep; 2482 struct nlmsgerr *errmsg; 2483 size_t payload = sizeof(*errmsg); 2484 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); 2485 unsigned int flags = 0; 2486 size_t tlvlen; 2487 2488 /* Error messages get the original request appened, unless the user 2489 * requests to cap the error message, and get extra error data if 2490 * requested. 2491 */ 2492 if (err && !(nlk->flags & NETLINK_F_CAP_ACK)) 2493 payload += nlmsg_len(nlh); 2494 else 2495 flags |= NLM_F_CAPPED; 2496 2497 tlvlen = netlink_ack_tlv_len(nlk, err, extack); 2498 if (tlvlen) 2499 flags |= NLM_F_ACK_TLVS; 2500 2501 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); 2502 if (!skb) 2503 goto err_bad_put; 2504 2505 rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2506 NLMSG_ERROR, sizeof(*errmsg), flags); 2507 if (!rep) 2508 goto err_bad_put; 2509 errmsg = nlmsg_data(rep); 2510 errmsg->error = err; 2511 errmsg->msg = *nlh; 2512 2513 if (!(flags & NLM_F_CAPPED)) { 2514 if (!nlmsg_append(skb, nlmsg_len(nlh))) 2515 goto err_bad_put; 2516 2517 memcpy(errmsg->msg.nlmsg_data, nlh->nlmsg_data, 2518 nlmsg_len(nlh)); 2519 } 2520 2521 if (tlvlen) 2522 netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); 2523 2524 nlmsg_end(skb, rep); 2525 2526 nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); 2527 2528 return; 2529 2530 err_bad_put: 2531 NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; 2532 sk_error_report(NETLINK_CB(in_skb).sk); 2533 } 2534 EXPORT_SYMBOL(netlink_ack); 2535 2536 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 2537 struct nlmsghdr *, 2538 struct netlink_ext_ack *)) 2539 { 2540 struct netlink_ext_ack extack; 2541 struct nlmsghdr *nlh; 2542 int err; 2543 2544 while (skb->len >= nlmsg_total_size(0)) { 2545 int msglen; 2546 2547 memset(&extack, 0, sizeof(extack)); 2548 nlh = nlmsg_hdr(skb); 2549 err = 0; 2550 2551 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 2552 return 0; 2553 2554 /* Only requests are handled by the kernel */ 2555 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 2556 goto ack; 2557 2558 /* Skip control messages */ 2559 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 2560 goto ack; 2561 2562 err = cb(skb, nlh, &extack); 2563 if (err == -EINTR) 2564 goto skip; 2565 2566 ack: 2567 if (nlh->nlmsg_flags & NLM_F_ACK || err) 2568 netlink_ack(skb, nlh, err, &extack); 2569 2570 skip: 2571 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 2572 if (msglen > skb->len) 2573 msglen = skb->len; 2574 skb_pull(skb, msglen); 2575 } 2576 2577 return 0; 2578 } 2579 EXPORT_SYMBOL(netlink_rcv_skb); 2580 2581 /** 2582 * nlmsg_notify - send a notification netlink message 2583 * @sk: netlink socket to use 2584 * @skb: notification message 2585 * @portid: destination netlink portid for reports or 0 2586 * @group: destination multicast group or 0 2587 * @report: 1 to report back, 0 to disable 2588 * @flags: allocation flags 2589 */ 2590 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 2591 unsigned int group, int report, gfp_t flags) 2592 { 2593 int err = 0; 2594 2595 if (group) { 2596 int exclude_portid = 0; 2597 2598 if (report) { 2599 refcount_inc(&skb->users); 2600 exclude_portid = portid; 2601 } 2602 2603 /* errors reported via destination sk->sk_err, but propagate 2604 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 2605 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 2606 if (err == -ESRCH) 2607 err = 0; 2608 } 2609 2610 if (report) { 2611 int err2; 2612 2613 err2 = nlmsg_unicast(sk, skb, portid); 2614 if (!err) 2615 err = err2; 2616 } 2617 2618 return err; 2619 } 2620 EXPORT_SYMBOL(nlmsg_notify); 2621 2622 #ifdef CONFIG_PROC_FS 2623 struct nl_seq_iter { 2624 struct seq_net_private p; 2625 struct rhashtable_iter hti; 2626 int link; 2627 }; 2628 2629 static void netlink_walk_start(struct nl_seq_iter *iter) 2630 { 2631 rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti); 2632 rhashtable_walk_start(&iter->hti); 2633 } 2634 2635 static void netlink_walk_stop(struct nl_seq_iter *iter) 2636 { 2637 rhashtable_walk_stop(&iter->hti); 2638 rhashtable_walk_exit(&iter->hti); 2639 } 2640 2641 static void *__netlink_seq_next(struct seq_file *seq) 2642 { 2643 struct nl_seq_iter *iter = seq->private; 2644 struct netlink_sock *nlk; 2645 2646 do { 2647 for (;;) { 2648 nlk = rhashtable_walk_next(&iter->hti); 2649 2650 if (IS_ERR(nlk)) { 2651 if (PTR_ERR(nlk) == -EAGAIN) 2652 continue; 2653 2654 return nlk; 2655 } 2656 2657 if (nlk) 2658 break; 2659 2660 netlink_walk_stop(iter); 2661 if (++iter->link >= MAX_LINKS) 2662 return NULL; 2663 2664 netlink_walk_start(iter); 2665 } 2666 } while (sock_net(&nlk->sk) != seq_file_net(seq)); 2667 2668 return nlk; 2669 } 2670 2671 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) 2672 __acquires(RCU) 2673 { 2674 struct nl_seq_iter *iter = seq->private; 2675 void *obj = SEQ_START_TOKEN; 2676 loff_t pos; 2677 2678 iter->link = 0; 2679 2680 netlink_walk_start(iter); 2681 2682 for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) 2683 obj = __netlink_seq_next(seq); 2684 2685 return obj; 2686 } 2687 2688 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2689 { 2690 ++*pos; 2691 return __netlink_seq_next(seq); 2692 } 2693 2694 static void netlink_native_seq_stop(struct seq_file *seq, void *v) 2695 { 2696 struct nl_seq_iter *iter = seq->private; 2697 2698 if (iter->link >= MAX_LINKS) 2699 return; 2700 2701 netlink_walk_stop(iter); 2702 } 2703 2704 2705 static int netlink_native_seq_show(struct seq_file *seq, void *v) 2706 { 2707 if (v == SEQ_START_TOKEN) { 2708 seq_puts(seq, 2709 "sk Eth Pid Groups " 2710 "Rmem Wmem Dump Locks Drops Inode\n"); 2711 } else { 2712 struct sock *s = v; 2713 struct netlink_sock *nlk = nlk_sk(s); 2714 2715 seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n", 2716 s, 2717 s->sk_protocol, 2718 nlk->portid, 2719 nlk->groups ? (u32)nlk->groups[0] : 0, 2720 sk_rmem_alloc_get(s), 2721 sk_wmem_alloc_get(s), 2722 nlk->cb_running, 2723 refcount_read(&s->sk_refcnt), 2724 atomic_read(&s->sk_drops), 2725 sock_i_ino(s) 2726 ); 2727 2728 } 2729 return 0; 2730 } 2731 2732 #ifdef CONFIG_BPF_SYSCALL 2733 struct bpf_iter__netlink { 2734 __bpf_md_ptr(struct bpf_iter_meta *, meta); 2735 __bpf_md_ptr(struct netlink_sock *, sk); 2736 }; 2737 2738 DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk) 2739 2740 static int netlink_prog_seq_show(struct bpf_prog *prog, 2741 struct bpf_iter_meta *meta, 2742 void *v) 2743 { 2744 struct bpf_iter__netlink ctx; 2745 2746 meta->seq_num--; /* skip SEQ_START_TOKEN */ 2747 ctx.meta = meta; 2748 ctx.sk = nlk_sk((struct sock *)v); 2749 return bpf_iter_run_prog(prog, &ctx); 2750 } 2751 2752 static int netlink_seq_show(struct seq_file *seq, void *v) 2753 { 2754 struct bpf_iter_meta meta; 2755 struct bpf_prog *prog; 2756 2757 meta.seq = seq; 2758 prog = bpf_iter_get_info(&meta, false); 2759 if (!prog) 2760 return netlink_native_seq_show(seq, v); 2761 2762 if (v != SEQ_START_TOKEN) 2763 return netlink_prog_seq_show(prog, &meta, v); 2764 2765 return 0; 2766 } 2767 2768 static void netlink_seq_stop(struct seq_file *seq, void *v) 2769 { 2770 struct bpf_iter_meta meta; 2771 struct bpf_prog *prog; 2772 2773 if (!v) { 2774 meta.seq = seq; 2775 prog = bpf_iter_get_info(&meta, true); 2776 if (prog) 2777 (void)netlink_prog_seq_show(prog, &meta, v); 2778 } 2779 2780 netlink_native_seq_stop(seq, v); 2781 } 2782 #else 2783 static int netlink_seq_show(struct seq_file *seq, void *v) 2784 { 2785 return netlink_native_seq_show(seq, v); 2786 } 2787 2788 static void netlink_seq_stop(struct seq_file *seq, void *v) 2789 { 2790 netlink_native_seq_stop(seq, v); 2791 } 2792 #endif 2793 2794 static const struct seq_operations netlink_seq_ops = { 2795 .start = netlink_seq_start, 2796 .next = netlink_seq_next, 2797 .stop = netlink_seq_stop, 2798 .show = netlink_seq_show, 2799 }; 2800 #endif 2801 2802 int netlink_register_notifier(struct notifier_block *nb) 2803 { 2804 return blocking_notifier_chain_register(&netlink_chain, nb); 2805 } 2806 EXPORT_SYMBOL(netlink_register_notifier); 2807 2808 int netlink_unregister_notifier(struct notifier_block *nb) 2809 { 2810 return blocking_notifier_chain_unregister(&netlink_chain, nb); 2811 } 2812 EXPORT_SYMBOL(netlink_unregister_notifier); 2813 2814 static const struct proto_ops netlink_ops = { 2815 .family = PF_NETLINK, 2816 .owner = THIS_MODULE, 2817 .release = netlink_release, 2818 .bind = netlink_bind, 2819 .connect = netlink_connect, 2820 .socketpair = sock_no_socketpair, 2821 .accept = sock_no_accept, 2822 .getname = netlink_getname, 2823 .poll = datagram_poll, 2824 .ioctl = netlink_ioctl, 2825 .listen = sock_no_listen, 2826 .shutdown = sock_no_shutdown, 2827 .setsockopt = netlink_setsockopt, 2828 .getsockopt = netlink_getsockopt, 2829 .sendmsg = netlink_sendmsg, 2830 .recvmsg = netlink_recvmsg, 2831 .mmap = sock_no_mmap, 2832 .sendpage = sock_no_sendpage, 2833 }; 2834 2835 static const struct net_proto_family netlink_family_ops = { 2836 .family = PF_NETLINK, 2837 .create = netlink_create, 2838 .owner = THIS_MODULE, /* for consistency 8) */ 2839 }; 2840 2841 static int __net_init netlink_net_init(struct net *net) 2842 { 2843 #ifdef CONFIG_PROC_FS 2844 if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops, 2845 sizeof(struct nl_seq_iter))) 2846 return -ENOMEM; 2847 #endif 2848 return 0; 2849 } 2850 2851 static void __net_exit netlink_net_exit(struct net *net) 2852 { 2853 #ifdef CONFIG_PROC_FS 2854 remove_proc_entry("netlink", net->proc_net); 2855 #endif 2856 } 2857 2858 static void __init netlink_add_usersock_entry(void) 2859 { 2860 struct listeners *listeners; 2861 int groups = 32; 2862 2863 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2864 if (!listeners) 2865 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2866 2867 netlink_table_grab(); 2868 2869 nl_table[NETLINK_USERSOCK].groups = groups; 2870 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2871 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2872 nl_table[NETLINK_USERSOCK].registered = 1; 2873 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2874 2875 netlink_table_ungrab(); 2876 } 2877 2878 static struct pernet_operations __net_initdata netlink_net_ops = { 2879 .init = netlink_net_init, 2880 .exit = netlink_net_exit, 2881 }; 2882 2883 static inline u32 netlink_hash(const void *data, u32 len, u32 seed) 2884 { 2885 const struct netlink_sock *nlk = data; 2886 struct netlink_compare_arg arg; 2887 2888 netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); 2889 return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); 2890 } 2891 2892 static const struct rhashtable_params netlink_rhashtable_params = { 2893 .head_offset = offsetof(struct netlink_sock, node), 2894 .key_len = netlink_compare_arg_len, 2895 .obj_hashfn = netlink_hash, 2896 .obj_cmpfn = netlink_compare, 2897 .automatic_shrinking = true, 2898 }; 2899 2900 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2901 BTF_ID_LIST(btf_netlink_sock_id) 2902 BTF_ID(struct, netlink_sock) 2903 2904 static const struct bpf_iter_seq_info netlink_seq_info = { 2905 .seq_ops = &netlink_seq_ops, 2906 .init_seq_private = bpf_iter_init_seq_net, 2907 .fini_seq_private = bpf_iter_fini_seq_net, 2908 .seq_priv_size = sizeof(struct nl_seq_iter), 2909 }; 2910 2911 static struct bpf_iter_reg netlink_reg_info = { 2912 .target = "netlink", 2913 .ctx_arg_info_size = 1, 2914 .ctx_arg_info = { 2915 { offsetof(struct bpf_iter__netlink, sk), 2916 PTR_TO_BTF_ID_OR_NULL }, 2917 }, 2918 .seq_info = &netlink_seq_info, 2919 }; 2920 2921 static int __init bpf_iter_register(void) 2922 { 2923 netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; 2924 return bpf_iter_reg_target(&netlink_reg_info); 2925 } 2926 #endif 2927 2928 static int __init netlink_proto_init(void) 2929 { 2930 int i; 2931 int err = proto_register(&netlink_proto, 0); 2932 2933 if (err != 0) 2934 goto out; 2935 2936 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2937 err = bpf_iter_register(); 2938 if (err) 2939 goto out; 2940 #endif 2941 2942 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb)); 2943 2944 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2945 if (!nl_table) 2946 goto panic; 2947 2948 for (i = 0; i < MAX_LINKS; i++) { 2949 if (rhashtable_init(&nl_table[i].hash, 2950 &netlink_rhashtable_params) < 0) { 2951 while (--i > 0) 2952 rhashtable_destroy(&nl_table[i].hash); 2953 kfree(nl_table); 2954 goto panic; 2955 } 2956 } 2957 2958 netlink_add_usersock_entry(); 2959 2960 sock_register(&netlink_family_ops); 2961 register_pernet_subsys(&netlink_net_ops); 2962 register_pernet_subsys(&netlink_tap_net_ops); 2963 /* The netlink device handler may be needed early. */ 2964 rtnetlink_init(); 2965 out: 2966 return err; 2967 panic: 2968 panic("netlink_init: Cannot allocate nl_table\n"); 2969 } 2970 2971 core_initcall(netlink_proto_init); 2972