1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NETLINK Kernel-user communication protocol. 4 * 5 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * Patrick McHardy <kaber@trash.net> 8 * 9 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 10 * added netlink_proto_exit 11 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 12 * use nlk_sk, as sk->protinfo is on a diet 8) 13 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 14 * - inc module use count of module that owns 15 * the kernel socket in case userspace opens 16 * socket of same protocol 17 * - remove all module support, since netlink is 18 * mandatory if CONFIG_NET=y these days 19 */ 20 21 #include <linux/module.h> 22 23 #include <linux/bpf.h> 24 #include <linux/capability.h> 25 #include <linux/kernel.h> 26 #include <linux/filter.h> 27 #include <linux/init.h> 28 #include <linux/signal.h> 29 #include <linux/sched.h> 30 #include <linux/errno.h> 31 #include <linux/string.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/un.h> 35 #include <linux/fcntl.h> 36 #include <linux/termios.h> 37 #include <linux/sockios.h> 38 #include <linux/net.h> 39 #include <linux/fs.h> 40 #include <linux/slab.h> 41 #include <linux/uaccess.h> 42 #include <linux/skbuff.h> 43 #include <linux/netdevice.h> 44 #include <linux/rtnetlink.h> 45 #include <linux/proc_fs.h> 46 #include <linux/seq_file.h> 47 #include <linux/notifier.h> 48 #include <linux/security.h> 49 #include <linux/jhash.h> 50 #include <linux/jiffies.h> 51 #include <linux/random.h> 52 #include <linux/bitops.h> 53 #include <linux/mm.h> 54 #include <linux/types.h> 55 #include <linux/audit.h> 56 #include <linux/mutex.h> 57 #include <linux/vmalloc.h> 58 #include <linux/if_arp.h> 59 #include <linux/rhashtable.h> 60 #include <asm/cacheflush.h> 61 #include <linux/hash.h> 62 #include <linux/genetlink.h> 63 #include <linux/net_namespace.h> 64 #include <linux/nospec.h> 65 #include <linux/btf_ids.h> 66 67 #include <net/net_namespace.h> 68 #include <net/netns/generic.h> 69 #include <net/sock.h> 70 #include <net/scm.h> 71 #include <net/netlink.h> 72 #define CREATE_TRACE_POINTS 73 #include <trace/events/netlink.h> 74 75 #include "af_netlink.h" 76 77 struct listeners { 78 struct rcu_head rcu; 79 unsigned long masks[]; 80 }; 81 82 /* state bits */ 83 #define NETLINK_S_CONGESTED 0x0 84 85 static inline int netlink_is_kernel(struct sock *sk) 86 { 87 return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; 88 } 89 90 struct netlink_table *nl_table __read_mostly; 91 EXPORT_SYMBOL_GPL(nl_table); 92 93 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 94 95 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; 96 97 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { 98 "nlk_cb_mutex-ROUTE", 99 "nlk_cb_mutex-1", 100 "nlk_cb_mutex-USERSOCK", 101 "nlk_cb_mutex-FIREWALL", 102 "nlk_cb_mutex-SOCK_DIAG", 103 "nlk_cb_mutex-NFLOG", 104 "nlk_cb_mutex-XFRM", 105 "nlk_cb_mutex-SELINUX", 106 "nlk_cb_mutex-ISCSI", 107 "nlk_cb_mutex-AUDIT", 108 "nlk_cb_mutex-FIB_LOOKUP", 109 "nlk_cb_mutex-CONNECTOR", 110 "nlk_cb_mutex-NETFILTER", 111 "nlk_cb_mutex-IP6_FW", 112 "nlk_cb_mutex-DNRTMSG", 113 "nlk_cb_mutex-KOBJECT_UEVENT", 114 "nlk_cb_mutex-GENERIC", 115 "nlk_cb_mutex-17", 116 "nlk_cb_mutex-SCSITRANSPORT", 117 "nlk_cb_mutex-ECRYPTFS", 118 "nlk_cb_mutex-RDMA", 119 "nlk_cb_mutex-CRYPTO", 120 "nlk_cb_mutex-SMC", 121 "nlk_cb_mutex-23", 122 "nlk_cb_mutex-24", 123 "nlk_cb_mutex-25", 124 "nlk_cb_mutex-26", 125 "nlk_cb_mutex-27", 126 "nlk_cb_mutex-28", 127 "nlk_cb_mutex-29", 128 "nlk_cb_mutex-30", 129 "nlk_cb_mutex-31", 130 "nlk_cb_mutex-MAX_LINKS" 131 }; 132 133 static int netlink_dump(struct sock *sk); 134 135 /* nl_table locking explained: 136 * Lookup and traversal are protected with an RCU read-side lock. Insertion 137 * and removal are protected with per bucket lock while using RCU list 138 * modification primitives and may run in parallel to RCU protected lookups. 139 * Destruction of the Netlink socket may only occur *after* nl_table_lock has 140 * been acquired * either during or after the socket has been removed from 141 * the list and after an RCU grace period. 142 */ 143 DEFINE_RWLOCK(nl_table_lock); 144 EXPORT_SYMBOL_GPL(nl_table_lock); 145 static atomic_t nl_table_users = ATOMIC_INIT(0); 146 147 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 148 149 static BLOCKING_NOTIFIER_HEAD(netlink_chain); 150 151 152 static const struct rhashtable_params netlink_rhashtable_params; 153 154 void do_trace_netlink_extack(const char *msg) 155 { 156 trace_netlink_extack(msg); 157 } 158 EXPORT_SYMBOL(do_trace_netlink_extack); 159 160 static inline u32 netlink_group_mask(u32 group) 161 { 162 if (group > 32) 163 return 0; 164 return group ? 1 << (group - 1) : 0; 165 } 166 167 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, 168 gfp_t gfp_mask) 169 { 170 unsigned int len = skb_end_offset(skb); 171 struct sk_buff *new; 172 173 new = alloc_skb(len, gfp_mask); 174 if (new == NULL) 175 return NULL; 176 177 NETLINK_CB(new).portid = NETLINK_CB(skb).portid; 178 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; 179 NETLINK_CB(new).creds = NETLINK_CB(skb).creds; 180 181 skb_put_data(new, skb->data, len); 182 return new; 183 } 184 185 static unsigned int netlink_tap_net_id; 186 187 struct netlink_tap_net { 188 struct list_head netlink_tap_all; 189 struct mutex netlink_tap_lock; 190 }; 191 192 int netlink_add_tap(struct netlink_tap *nt) 193 { 194 struct net *net = dev_net(nt->dev); 195 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 196 197 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 198 return -EINVAL; 199 200 mutex_lock(&nn->netlink_tap_lock); 201 list_add_rcu(&nt->list, &nn->netlink_tap_all); 202 mutex_unlock(&nn->netlink_tap_lock); 203 204 __module_get(nt->module); 205 206 return 0; 207 } 208 EXPORT_SYMBOL_GPL(netlink_add_tap); 209 210 static int __netlink_remove_tap(struct netlink_tap *nt) 211 { 212 struct net *net = dev_net(nt->dev); 213 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 214 bool found = false; 215 struct netlink_tap *tmp; 216 217 mutex_lock(&nn->netlink_tap_lock); 218 219 list_for_each_entry(tmp, &nn->netlink_tap_all, list) { 220 if (nt == tmp) { 221 list_del_rcu(&nt->list); 222 found = true; 223 goto out; 224 } 225 } 226 227 pr_warn("__netlink_remove_tap: %p not found\n", nt); 228 out: 229 mutex_unlock(&nn->netlink_tap_lock); 230 231 if (found) 232 module_put(nt->module); 233 234 return found ? 0 : -ENODEV; 235 } 236 237 int netlink_remove_tap(struct netlink_tap *nt) 238 { 239 int ret; 240 241 ret = __netlink_remove_tap(nt); 242 synchronize_net(); 243 244 return ret; 245 } 246 EXPORT_SYMBOL_GPL(netlink_remove_tap); 247 248 static __net_init int netlink_tap_init_net(struct net *net) 249 { 250 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 251 252 INIT_LIST_HEAD(&nn->netlink_tap_all); 253 mutex_init(&nn->netlink_tap_lock); 254 return 0; 255 } 256 257 static struct pernet_operations netlink_tap_net_ops = { 258 .init = netlink_tap_init_net, 259 .id = &netlink_tap_net_id, 260 .size = sizeof(struct netlink_tap_net), 261 }; 262 263 static bool netlink_filter_tap(const struct sk_buff *skb) 264 { 265 struct sock *sk = skb->sk; 266 267 /* We take the more conservative approach and 268 * whitelist socket protocols that may pass. 269 */ 270 switch (sk->sk_protocol) { 271 case NETLINK_ROUTE: 272 case NETLINK_USERSOCK: 273 case NETLINK_SOCK_DIAG: 274 case NETLINK_NFLOG: 275 case NETLINK_XFRM: 276 case NETLINK_FIB_LOOKUP: 277 case NETLINK_NETFILTER: 278 case NETLINK_GENERIC: 279 return true; 280 } 281 282 return false; 283 } 284 285 static int __netlink_deliver_tap_skb(struct sk_buff *skb, 286 struct net_device *dev) 287 { 288 struct sk_buff *nskb; 289 struct sock *sk = skb->sk; 290 int ret = -ENOMEM; 291 292 if (!net_eq(dev_net(dev), sock_net(sk))) 293 return 0; 294 295 dev_hold(dev); 296 297 if (is_vmalloc_addr(skb->head)) 298 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 299 else 300 nskb = skb_clone(skb, GFP_ATOMIC); 301 if (nskb) { 302 nskb->dev = dev; 303 nskb->protocol = htons((u16) sk->sk_protocol); 304 nskb->pkt_type = netlink_is_kernel(sk) ? 305 PACKET_KERNEL : PACKET_USER; 306 skb_reset_network_header(nskb); 307 ret = dev_queue_xmit(nskb); 308 if (unlikely(ret > 0)) 309 ret = net_xmit_errno(ret); 310 } 311 312 dev_put(dev); 313 return ret; 314 } 315 316 static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn) 317 { 318 int ret; 319 struct netlink_tap *tmp; 320 321 if (!netlink_filter_tap(skb)) 322 return; 323 324 list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) { 325 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 326 if (unlikely(ret)) 327 break; 328 } 329 } 330 331 static void netlink_deliver_tap(struct net *net, struct sk_buff *skb) 332 { 333 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 334 335 rcu_read_lock(); 336 337 if (unlikely(!list_empty(&nn->netlink_tap_all))) 338 __netlink_deliver_tap(skb, nn); 339 340 rcu_read_unlock(); 341 } 342 343 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, 344 struct sk_buff *skb) 345 { 346 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) 347 netlink_deliver_tap(sock_net(dst), skb); 348 } 349 350 static void netlink_overrun(struct sock *sk) 351 { 352 struct netlink_sock *nlk = nlk_sk(sk); 353 354 if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { 355 if (!test_and_set_bit(NETLINK_S_CONGESTED, 356 &nlk_sk(sk)->state)) { 357 sk->sk_err = ENOBUFS; 358 sk_error_report(sk); 359 } 360 } 361 atomic_inc(&sk->sk_drops); 362 } 363 364 static void netlink_rcv_wake(struct sock *sk) 365 { 366 struct netlink_sock *nlk = nlk_sk(sk); 367 368 if (skb_queue_empty_lockless(&sk->sk_receive_queue)) 369 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 370 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) 371 wake_up_interruptible(&nlk->wait); 372 } 373 374 static void netlink_skb_destructor(struct sk_buff *skb) 375 { 376 if (is_vmalloc_addr(skb->head)) { 377 if (!skb->cloned || 378 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 379 vfree(skb->head); 380 381 skb->head = NULL; 382 } 383 if (skb->sk != NULL) 384 sock_rfree(skb); 385 } 386 387 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 388 { 389 WARN_ON(skb->sk != NULL); 390 skb->sk = sk; 391 skb->destructor = netlink_skb_destructor; 392 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 393 sk_mem_charge(sk, skb->truesize); 394 } 395 396 static void netlink_sock_destruct(struct sock *sk) 397 { 398 struct netlink_sock *nlk = nlk_sk(sk); 399 400 if (nlk->cb_running) { 401 if (nlk->cb.done) 402 nlk->cb.done(&nlk->cb); 403 module_put(nlk->cb.module); 404 kfree_skb(nlk->cb.skb); 405 } 406 407 skb_queue_purge(&sk->sk_receive_queue); 408 409 if (!sock_flag(sk, SOCK_DEAD)) { 410 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 411 return; 412 } 413 414 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 415 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 416 WARN_ON(nlk_sk(sk)->groups); 417 } 418 419 static void netlink_sock_destruct_work(struct work_struct *work) 420 { 421 struct netlink_sock *nlk = container_of(work, struct netlink_sock, 422 work); 423 424 sk_free(&nlk->sk); 425 } 426 427 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 428 * SMP. Look, when several writers sleep and reader wakes them up, all but one 429 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 430 * this, _but_ remember, it adds useless work on UP machines. 431 */ 432 433 void netlink_table_grab(void) 434 __acquires(nl_table_lock) 435 { 436 might_sleep(); 437 438 write_lock_irq(&nl_table_lock); 439 440 if (atomic_read(&nl_table_users)) { 441 DECLARE_WAITQUEUE(wait, current); 442 443 add_wait_queue_exclusive(&nl_table_wait, &wait); 444 for (;;) { 445 set_current_state(TASK_UNINTERRUPTIBLE); 446 if (atomic_read(&nl_table_users) == 0) 447 break; 448 write_unlock_irq(&nl_table_lock); 449 schedule(); 450 write_lock_irq(&nl_table_lock); 451 } 452 453 __set_current_state(TASK_RUNNING); 454 remove_wait_queue(&nl_table_wait, &wait); 455 } 456 } 457 458 void netlink_table_ungrab(void) 459 __releases(nl_table_lock) 460 { 461 write_unlock_irq(&nl_table_lock); 462 wake_up(&nl_table_wait); 463 } 464 465 static inline void 466 netlink_lock_table(void) 467 { 468 unsigned long flags; 469 470 /* read_lock() synchronizes us to netlink_table_grab */ 471 472 read_lock_irqsave(&nl_table_lock, flags); 473 atomic_inc(&nl_table_users); 474 read_unlock_irqrestore(&nl_table_lock, flags); 475 } 476 477 static inline void 478 netlink_unlock_table(void) 479 { 480 if (atomic_dec_and_test(&nl_table_users)) 481 wake_up(&nl_table_wait); 482 } 483 484 struct netlink_compare_arg 485 { 486 possible_net_t pnet; 487 u32 portid; 488 }; 489 490 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ 491 #define netlink_compare_arg_len \ 492 (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) 493 494 static inline int netlink_compare(struct rhashtable_compare_arg *arg, 495 const void *ptr) 496 { 497 const struct netlink_compare_arg *x = arg->key; 498 const struct netlink_sock *nlk = ptr; 499 500 return nlk->portid != x->portid || 501 !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); 502 } 503 504 static void netlink_compare_arg_init(struct netlink_compare_arg *arg, 505 struct net *net, u32 portid) 506 { 507 memset(arg, 0, sizeof(*arg)); 508 write_pnet(&arg->pnet, net); 509 arg->portid = portid; 510 } 511 512 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, 513 struct net *net) 514 { 515 struct netlink_compare_arg arg; 516 517 netlink_compare_arg_init(&arg, net, portid); 518 return rhashtable_lookup_fast(&table->hash, &arg, 519 netlink_rhashtable_params); 520 } 521 522 static int __netlink_insert(struct netlink_table *table, struct sock *sk) 523 { 524 struct netlink_compare_arg arg; 525 526 netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); 527 return rhashtable_lookup_insert_key(&table->hash, &arg, 528 &nlk_sk(sk)->node, 529 netlink_rhashtable_params); 530 } 531 532 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 533 { 534 struct netlink_table *table = &nl_table[protocol]; 535 struct sock *sk; 536 537 rcu_read_lock(); 538 sk = __netlink_lookup(table, portid, net); 539 if (sk) 540 sock_hold(sk); 541 rcu_read_unlock(); 542 543 return sk; 544 } 545 546 static const struct proto_ops netlink_ops; 547 548 static void 549 netlink_update_listeners(struct sock *sk) 550 { 551 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 552 unsigned long mask; 553 unsigned int i; 554 struct listeners *listeners; 555 556 listeners = nl_deref_protected(tbl->listeners); 557 if (!listeners) 558 return; 559 560 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 561 mask = 0; 562 sk_for_each_bound(sk, &tbl->mc_list) { 563 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 564 mask |= nlk_sk(sk)->groups[i]; 565 } 566 listeners->masks[i] = mask; 567 } 568 /* this function is only called with the netlink table "grabbed", which 569 * makes sure updates are visible before bind or setsockopt return. */ 570 } 571 572 static int netlink_insert(struct sock *sk, u32 portid) 573 { 574 struct netlink_table *table = &nl_table[sk->sk_protocol]; 575 int err; 576 577 lock_sock(sk); 578 579 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; 580 if (nlk_sk(sk)->bound) 581 goto err; 582 583 /* portid can be read locklessly from netlink_getname(). */ 584 WRITE_ONCE(nlk_sk(sk)->portid, portid); 585 586 sock_hold(sk); 587 588 err = __netlink_insert(table, sk); 589 if (err) { 590 /* In case the hashtable backend returns with -EBUSY 591 * from here, it must not escape to the caller. 592 */ 593 if (unlikely(err == -EBUSY)) 594 err = -EOVERFLOW; 595 if (err == -EEXIST) 596 err = -EADDRINUSE; 597 sock_put(sk); 598 goto err; 599 } 600 601 /* We need to ensure that the socket is hashed and visible. */ 602 smp_wmb(); 603 /* Paired with lockless reads from netlink_bind(), 604 * netlink_connect() and netlink_sendmsg(). 605 */ 606 WRITE_ONCE(nlk_sk(sk)->bound, portid); 607 608 err: 609 release_sock(sk); 610 return err; 611 } 612 613 static void netlink_remove(struct sock *sk) 614 { 615 struct netlink_table *table; 616 617 table = &nl_table[sk->sk_protocol]; 618 if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, 619 netlink_rhashtable_params)) { 620 WARN_ON(refcount_read(&sk->sk_refcnt) == 1); 621 __sock_put(sk); 622 } 623 624 netlink_table_grab(); 625 if (nlk_sk(sk)->subscriptions) { 626 __sk_del_bind_node(sk); 627 netlink_update_listeners(sk); 628 } 629 if (sk->sk_protocol == NETLINK_GENERIC) 630 atomic_inc(&genl_sk_destructing_cnt); 631 netlink_table_ungrab(); 632 } 633 634 static struct proto netlink_proto = { 635 .name = "NETLINK", 636 .owner = THIS_MODULE, 637 .obj_size = sizeof(struct netlink_sock), 638 }; 639 640 static int __netlink_create(struct net *net, struct socket *sock, 641 struct mutex *cb_mutex, int protocol, 642 int kern) 643 { 644 struct sock *sk; 645 struct netlink_sock *nlk; 646 647 sock->ops = &netlink_ops; 648 649 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); 650 if (!sk) 651 return -ENOMEM; 652 653 sock_init_data(sock, sk); 654 655 nlk = nlk_sk(sk); 656 if (cb_mutex) { 657 nlk->cb_mutex = cb_mutex; 658 } else { 659 nlk->cb_mutex = &nlk->cb_def_mutex; 660 mutex_init(nlk->cb_mutex); 661 lockdep_set_class_and_name(nlk->cb_mutex, 662 nlk_cb_mutex_keys + protocol, 663 nlk_cb_mutex_key_strings[protocol]); 664 } 665 init_waitqueue_head(&nlk->wait); 666 667 sk->sk_destruct = netlink_sock_destruct; 668 sk->sk_protocol = protocol; 669 return 0; 670 } 671 672 static int netlink_create(struct net *net, struct socket *sock, int protocol, 673 int kern) 674 { 675 struct module *module = NULL; 676 struct mutex *cb_mutex; 677 struct netlink_sock *nlk; 678 int (*bind)(struct net *net, int group); 679 void (*unbind)(struct net *net, int group); 680 void (*release)(struct sock *sock, unsigned long *groups); 681 int err = 0; 682 683 sock->state = SS_UNCONNECTED; 684 685 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 686 return -ESOCKTNOSUPPORT; 687 688 if (protocol < 0 || protocol >= MAX_LINKS) 689 return -EPROTONOSUPPORT; 690 protocol = array_index_nospec(protocol, MAX_LINKS); 691 692 netlink_lock_table(); 693 #ifdef CONFIG_MODULES 694 if (!nl_table[protocol].registered) { 695 netlink_unlock_table(); 696 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 697 netlink_lock_table(); 698 } 699 #endif 700 if (nl_table[protocol].registered && 701 try_module_get(nl_table[protocol].module)) 702 module = nl_table[protocol].module; 703 else 704 err = -EPROTONOSUPPORT; 705 cb_mutex = nl_table[protocol].cb_mutex; 706 bind = nl_table[protocol].bind; 707 unbind = nl_table[protocol].unbind; 708 release = nl_table[protocol].release; 709 netlink_unlock_table(); 710 711 if (err < 0) 712 goto out; 713 714 err = __netlink_create(net, sock, cb_mutex, protocol, kern); 715 if (err < 0) 716 goto out_module; 717 718 sock_prot_inuse_add(net, &netlink_proto, 1); 719 720 nlk = nlk_sk(sock->sk); 721 nlk->module = module; 722 nlk->netlink_bind = bind; 723 nlk->netlink_unbind = unbind; 724 nlk->netlink_release = release; 725 out: 726 return err; 727 728 out_module: 729 module_put(module); 730 goto out; 731 } 732 733 static void deferred_put_nlk_sk(struct rcu_head *head) 734 { 735 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); 736 struct sock *sk = &nlk->sk; 737 738 kfree(nlk->groups); 739 nlk->groups = NULL; 740 741 if (!refcount_dec_and_test(&sk->sk_refcnt)) 742 return; 743 744 if (nlk->cb_running && nlk->cb.done) { 745 INIT_WORK(&nlk->work, netlink_sock_destruct_work); 746 schedule_work(&nlk->work); 747 return; 748 } 749 750 sk_free(sk); 751 } 752 753 static int netlink_release(struct socket *sock) 754 { 755 struct sock *sk = sock->sk; 756 struct netlink_sock *nlk; 757 758 if (!sk) 759 return 0; 760 761 netlink_remove(sk); 762 sock_orphan(sk); 763 nlk = nlk_sk(sk); 764 765 /* 766 * OK. Socket is unlinked, any packets that arrive now 767 * will be purged. 768 */ 769 if (nlk->netlink_release) 770 nlk->netlink_release(sk, nlk->groups); 771 772 /* must not acquire netlink_table_lock in any way again before unbind 773 * and notifying genetlink is done as otherwise it might deadlock 774 */ 775 if (nlk->netlink_unbind) { 776 int i; 777 778 for (i = 0; i < nlk->ngroups; i++) 779 if (test_bit(i, nlk->groups)) 780 nlk->netlink_unbind(sock_net(sk), i + 1); 781 } 782 if (sk->sk_protocol == NETLINK_GENERIC && 783 atomic_dec_return(&genl_sk_destructing_cnt) == 0) 784 wake_up(&genl_sk_destructing_waitq); 785 786 sock->sk = NULL; 787 wake_up_interruptible_all(&nlk->wait); 788 789 skb_queue_purge(&sk->sk_write_queue); 790 791 if (nlk->portid && nlk->bound) { 792 struct netlink_notify n = { 793 .net = sock_net(sk), 794 .protocol = sk->sk_protocol, 795 .portid = nlk->portid, 796 }; 797 blocking_notifier_call_chain(&netlink_chain, 798 NETLINK_URELEASE, &n); 799 } 800 801 module_put(nlk->module); 802 803 if (netlink_is_kernel(sk)) { 804 netlink_table_grab(); 805 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 806 if (--nl_table[sk->sk_protocol].registered == 0) { 807 struct listeners *old; 808 809 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 810 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 811 kfree_rcu(old, rcu); 812 nl_table[sk->sk_protocol].module = NULL; 813 nl_table[sk->sk_protocol].bind = NULL; 814 nl_table[sk->sk_protocol].unbind = NULL; 815 nl_table[sk->sk_protocol].flags = 0; 816 nl_table[sk->sk_protocol].registered = 0; 817 } 818 netlink_table_ungrab(); 819 } 820 821 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 822 823 /* Because struct net might disappear soon, do not keep a pointer. */ 824 if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { 825 __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); 826 /* Because of deferred_put_nlk_sk and use of work queue, 827 * it is possible netns will be freed before this socket. 828 */ 829 sock_net_set(sk, &init_net); 830 __netns_tracker_alloc(&init_net, &sk->ns_tracker, 831 false, GFP_KERNEL); 832 } 833 call_rcu(&nlk->rcu, deferred_put_nlk_sk); 834 return 0; 835 } 836 837 static int netlink_autobind(struct socket *sock) 838 { 839 struct sock *sk = sock->sk; 840 struct net *net = sock_net(sk); 841 struct netlink_table *table = &nl_table[sk->sk_protocol]; 842 s32 portid = task_tgid_vnr(current); 843 int err; 844 s32 rover = -4096; 845 bool ok; 846 847 retry: 848 cond_resched(); 849 rcu_read_lock(); 850 ok = !__netlink_lookup(table, portid, net); 851 rcu_read_unlock(); 852 if (!ok) { 853 /* Bind collision, search negative portid values. */ 854 if (rover == -4096) 855 /* rover will be in range [S32_MIN, -4097] */ 856 rover = S32_MIN + get_random_u32_below(-4096 - S32_MIN); 857 else if (rover >= -4096) 858 rover = -4097; 859 portid = rover--; 860 goto retry; 861 } 862 863 err = netlink_insert(sk, portid); 864 if (err == -EADDRINUSE) 865 goto retry; 866 867 /* If 2 threads race to autobind, that is fine. */ 868 if (err == -EBUSY) 869 err = 0; 870 871 return err; 872 } 873 874 /** 875 * __netlink_ns_capable - General netlink message capability test 876 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. 877 * @user_ns: The user namespace of the capability to use 878 * @cap: The capability to use 879 * 880 * Test to see if the opener of the socket we received the message 881 * from had when the netlink socket was created and the sender of the 882 * message has the capability @cap in the user namespace @user_ns. 883 */ 884 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, 885 struct user_namespace *user_ns, int cap) 886 { 887 return ((nsp->flags & NETLINK_SKB_DST) || 888 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && 889 ns_capable(user_ns, cap); 890 } 891 EXPORT_SYMBOL(__netlink_ns_capable); 892 893 /** 894 * netlink_ns_capable - General netlink message capability test 895 * @skb: socket buffer holding a netlink command from userspace 896 * @user_ns: The user namespace of the capability to use 897 * @cap: The capability to use 898 * 899 * Test to see if the opener of the socket we received the message 900 * from had when the netlink socket was created and the sender of the 901 * message has the capability @cap in the user namespace @user_ns. 902 */ 903 bool netlink_ns_capable(const struct sk_buff *skb, 904 struct user_namespace *user_ns, int cap) 905 { 906 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); 907 } 908 EXPORT_SYMBOL(netlink_ns_capable); 909 910 /** 911 * netlink_capable - Netlink global message capability test 912 * @skb: socket buffer holding a netlink command from userspace 913 * @cap: The capability to use 914 * 915 * Test to see if the opener of the socket we received the message 916 * from had when the netlink socket was created and the sender of the 917 * message has the capability @cap in all user namespaces. 918 */ 919 bool netlink_capable(const struct sk_buff *skb, int cap) 920 { 921 return netlink_ns_capable(skb, &init_user_ns, cap); 922 } 923 EXPORT_SYMBOL(netlink_capable); 924 925 /** 926 * netlink_net_capable - Netlink network namespace message capability test 927 * @skb: socket buffer holding a netlink command from userspace 928 * @cap: The capability to use 929 * 930 * Test to see if the opener of the socket we received the message 931 * from had when the netlink socket was created and the sender of the 932 * message has the capability @cap over the network namespace of 933 * the socket we received the message from. 934 */ 935 bool netlink_net_capable(const struct sk_buff *skb, int cap) 936 { 937 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); 938 } 939 EXPORT_SYMBOL(netlink_net_capable); 940 941 static inline int netlink_allowed(const struct socket *sock, unsigned int flag) 942 { 943 return (nl_table[sock->sk->sk_protocol].flags & flag) || 944 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 945 } 946 947 static void 948 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 949 { 950 struct netlink_sock *nlk = nlk_sk(sk); 951 952 if (nlk->subscriptions && !subscriptions) 953 __sk_del_bind_node(sk); 954 else if (!nlk->subscriptions && subscriptions) 955 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 956 nlk->subscriptions = subscriptions; 957 } 958 959 static int netlink_realloc_groups(struct sock *sk) 960 { 961 struct netlink_sock *nlk = nlk_sk(sk); 962 unsigned int groups; 963 unsigned long *new_groups; 964 int err = 0; 965 966 netlink_table_grab(); 967 968 groups = nl_table[sk->sk_protocol].groups; 969 if (!nl_table[sk->sk_protocol].registered) { 970 err = -ENOENT; 971 goto out_unlock; 972 } 973 974 if (nlk->ngroups >= groups) 975 goto out_unlock; 976 977 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 978 if (new_groups == NULL) { 979 err = -ENOMEM; 980 goto out_unlock; 981 } 982 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 983 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 984 985 nlk->groups = new_groups; 986 nlk->ngroups = groups; 987 out_unlock: 988 netlink_table_ungrab(); 989 return err; 990 } 991 992 static void netlink_undo_bind(int group, long unsigned int groups, 993 struct sock *sk) 994 { 995 struct netlink_sock *nlk = nlk_sk(sk); 996 int undo; 997 998 if (!nlk->netlink_unbind) 999 return; 1000 1001 for (undo = 0; undo < group; undo++) 1002 if (test_bit(undo, &groups)) 1003 nlk->netlink_unbind(sock_net(sk), undo + 1); 1004 } 1005 1006 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 1007 int addr_len) 1008 { 1009 struct sock *sk = sock->sk; 1010 struct net *net = sock_net(sk); 1011 struct netlink_sock *nlk = nlk_sk(sk); 1012 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1013 int err = 0; 1014 unsigned long groups; 1015 bool bound; 1016 1017 if (addr_len < sizeof(struct sockaddr_nl)) 1018 return -EINVAL; 1019 1020 if (nladdr->nl_family != AF_NETLINK) 1021 return -EINVAL; 1022 groups = nladdr->nl_groups; 1023 1024 /* Only superuser is allowed to listen multicasts */ 1025 if (groups) { 1026 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1027 return -EPERM; 1028 err = netlink_realloc_groups(sk); 1029 if (err) 1030 return err; 1031 } 1032 1033 if (nlk->ngroups < BITS_PER_LONG) 1034 groups &= (1UL << nlk->ngroups) - 1; 1035 1036 /* Paired with WRITE_ONCE() in netlink_insert() */ 1037 bound = READ_ONCE(nlk->bound); 1038 if (bound) { 1039 /* Ensure nlk->portid is up-to-date. */ 1040 smp_rmb(); 1041 1042 if (nladdr->nl_pid != nlk->portid) 1043 return -EINVAL; 1044 } 1045 1046 if (nlk->netlink_bind && groups) { 1047 int group; 1048 1049 /* nl_groups is a u32, so cap the maximum groups we can bind */ 1050 for (group = 0; group < BITS_PER_TYPE(u32); group++) { 1051 if (!test_bit(group, &groups)) 1052 continue; 1053 err = nlk->netlink_bind(net, group + 1); 1054 if (!err) 1055 continue; 1056 netlink_undo_bind(group, groups, sk); 1057 return err; 1058 } 1059 } 1060 1061 /* No need for barriers here as we return to user-space without 1062 * using any of the bound attributes. 1063 */ 1064 netlink_lock_table(); 1065 if (!bound) { 1066 err = nladdr->nl_pid ? 1067 netlink_insert(sk, nladdr->nl_pid) : 1068 netlink_autobind(sock); 1069 if (err) { 1070 netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); 1071 goto unlock; 1072 } 1073 } 1074 1075 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 1076 goto unlock; 1077 netlink_unlock_table(); 1078 1079 netlink_table_grab(); 1080 netlink_update_subscriptions(sk, nlk->subscriptions + 1081 hweight32(groups) - 1082 hweight32(nlk->groups[0])); 1083 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; 1084 netlink_update_listeners(sk); 1085 netlink_table_ungrab(); 1086 1087 return 0; 1088 1089 unlock: 1090 netlink_unlock_table(); 1091 return err; 1092 } 1093 1094 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 1095 int alen, int flags) 1096 { 1097 int err = 0; 1098 struct sock *sk = sock->sk; 1099 struct netlink_sock *nlk = nlk_sk(sk); 1100 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1101 1102 if (alen < sizeof(addr->sa_family)) 1103 return -EINVAL; 1104 1105 if (addr->sa_family == AF_UNSPEC) { 1106 /* paired with READ_ONCE() in netlink_getsockbyportid() */ 1107 WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED); 1108 /* dst_portid and dst_group can be read locklessly */ 1109 WRITE_ONCE(nlk->dst_portid, 0); 1110 WRITE_ONCE(nlk->dst_group, 0); 1111 return 0; 1112 } 1113 if (addr->sa_family != AF_NETLINK) 1114 return -EINVAL; 1115 1116 if (alen < sizeof(struct sockaddr_nl)) 1117 return -EINVAL; 1118 1119 if ((nladdr->nl_groups || nladdr->nl_pid) && 1120 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1121 return -EPERM; 1122 1123 /* No need for barriers here as we return to user-space without 1124 * using any of the bound attributes. 1125 * Paired with WRITE_ONCE() in netlink_insert(). 1126 */ 1127 if (!READ_ONCE(nlk->bound)) 1128 err = netlink_autobind(sock); 1129 1130 if (err == 0) { 1131 /* paired with READ_ONCE() in netlink_getsockbyportid() */ 1132 WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED); 1133 /* dst_portid and dst_group can be read locklessly */ 1134 WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); 1135 WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); 1136 } 1137 1138 return err; 1139 } 1140 1141 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1142 int peer) 1143 { 1144 struct sock *sk = sock->sk; 1145 struct netlink_sock *nlk = nlk_sk(sk); 1146 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 1147 1148 nladdr->nl_family = AF_NETLINK; 1149 nladdr->nl_pad = 0; 1150 1151 if (peer) { 1152 /* Paired with WRITE_ONCE() in netlink_connect() */ 1153 nladdr->nl_pid = READ_ONCE(nlk->dst_portid); 1154 nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group)); 1155 } else { 1156 /* Paired with WRITE_ONCE() in netlink_insert() */ 1157 nladdr->nl_pid = READ_ONCE(nlk->portid); 1158 netlink_lock_table(); 1159 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1160 netlink_unlock_table(); 1161 } 1162 return sizeof(*nladdr); 1163 } 1164 1165 static int netlink_ioctl(struct socket *sock, unsigned int cmd, 1166 unsigned long arg) 1167 { 1168 /* try to hand this ioctl down to the NIC drivers. 1169 */ 1170 return -ENOIOCTLCMD; 1171 } 1172 1173 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1174 { 1175 struct sock *sock; 1176 struct netlink_sock *nlk; 1177 1178 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 1179 if (!sock) 1180 return ERR_PTR(-ECONNREFUSED); 1181 1182 /* Don't bother queuing skb if kernel socket has no input function */ 1183 nlk = nlk_sk(sock); 1184 /* dst_portid and sk_state can be changed in netlink_connect() */ 1185 if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED && 1186 READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { 1187 sock_put(sock); 1188 return ERR_PTR(-ECONNREFUSED); 1189 } 1190 return sock; 1191 } 1192 1193 struct sock *netlink_getsockbyfilp(struct file *filp) 1194 { 1195 struct inode *inode = file_inode(filp); 1196 struct sock *sock; 1197 1198 if (!S_ISSOCK(inode->i_mode)) 1199 return ERR_PTR(-ENOTSOCK); 1200 1201 sock = SOCKET_I(inode)->sk; 1202 if (sock->sk_family != AF_NETLINK) 1203 return ERR_PTR(-EINVAL); 1204 1205 sock_hold(sock); 1206 return sock; 1207 } 1208 1209 static struct sk_buff *netlink_alloc_large_skb(unsigned int size, 1210 int broadcast) 1211 { 1212 struct sk_buff *skb; 1213 void *data; 1214 1215 if (size <= NLMSG_GOODSIZE || broadcast) 1216 return alloc_skb(size, GFP_KERNEL); 1217 1218 size = SKB_DATA_ALIGN(size) + 1219 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1220 1221 data = vmalloc(size); 1222 if (data == NULL) 1223 return NULL; 1224 1225 skb = __build_skb(data, size); 1226 if (skb == NULL) 1227 vfree(data); 1228 else 1229 skb->destructor = netlink_skb_destructor; 1230 1231 return skb; 1232 } 1233 1234 /* 1235 * Attach a skb to a netlink socket. 1236 * The caller must hold a reference to the destination socket. On error, the 1237 * reference is dropped. The skb is not send to the destination, just all 1238 * all error checks are performed and memory in the queue is reserved. 1239 * Return values: 1240 * < 0: error. skb freed, reference to sock dropped. 1241 * 0: continue 1242 * 1: repeat lookup - reference dropped while waiting for socket memory. 1243 */ 1244 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1245 long *timeo, struct sock *ssk) 1246 { 1247 struct netlink_sock *nlk; 1248 1249 nlk = nlk_sk(sk); 1250 1251 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1252 test_bit(NETLINK_S_CONGESTED, &nlk->state))) { 1253 DECLARE_WAITQUEUE(wait, current); 1254 if (!*timeo) { 1255 if (!ssk || netlink_is_kernel(ssk)) 1256 netlink_overrun(sk); 1257 sock_put(sk); 1258 kfree_skb(skb); 1259 return -EAGAIN; 1260 } 1261 1262 __set_current_state(TASK_INTERRUPTIBLE); 1263 add_wait_queue(&nlk->wait, &wait); 1264 1265 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1266 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1267 !sock_flag(sk, SOCK_DEAD)) 1268 *timeo = schedule_timeout(*timeo); 1269 1270 __set_current_state(TASK_RUNNING); 1271 remove_wait_queue(&nlk->wait, &wait); 1272 sock_put(sk); 1273 1274 if (signal_pending(current)) { 1275 kfree_skb(skb); 1276 return sock_intr_errno(*timeo); 1277 } 1278 return 1; 1279 } 1280 netlink_skb_set_owner_r(skb, sk); 1281 return 0; 1282 } 1283 1284 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1285 { 1286 int len = skb->len; 1287 1288 netlink_deliver_tap(sock_net(sk), skb); 1289 1290 skb_queue_tail(&sk->sk_receive_queue, skb); 1291 sk->sk_data_ready(sk); 1292 return len; 1293 } 1294 1295 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1296 { 1297 int len = __netlink_sendskb(sk, skb); 1298 1299 sock_put(sk); 1300 return len; 1301 } 1302 1303 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 1304 { 1305 kfree_skb(skb); 1306 sock_put(sk); 1307 } 1308 1309 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 1310 { 1311 int delta; 1312 1313 WARN_ON(skb->sk != NULL); 1314 delta = skb->end - skb->tail; 1315 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1316 return skb; 1317 1318 if (skb_shared(skb)) { 1319 struct sk_buff *nskb = skb_clone(skb, allocation); 1320 if (!nskb) 1321 return skb; 1322 consume_skb(skb); 1323 skb = nskb; 1324 } 1325 1326 pskb_expand_head(skb, 0, -delta, 1327 (allocation & ~__GFP_DIRECT_RECLAIM) | 1328 __GFP_NOWARN | __GFP_NORETRY); 1329 return skb; 1330 } 1331 1332 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1333 struct sock *ssk) 1334 { 1335 int ret; 1336 struct netlink_sock *nlk = nlk_sk(sk); 1337 1338 ret = -ECONNREFUSED; 1339 if (nlk->netlink_rcv != NULL) { 1340 ret = skb->len; 1341 netlink_skb_set_owner_r(skb, sk); 1342 NETLINK_CB(skb).sk = ssk; 1343 netlink_deliver_tap_kernel(sk, ssk, skb); 1344 nlk->netlink_rcv(skb); 1345 consume_skb(skb); 1346 } else { 1347 kfree_skb(skb); 1348 } 1349 sock_put(sk); 1350 return ret; 1351 } 1352 1353 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 1354 u32 portid, int nonblock) 1355 { 1356 struct sock *sk; 1357 int err; 1358 long timeo; 1359 1360 skb = netlink_trim(skb, gfp_any()); 1361 1362 timeo = sock_sndtimeo(ssk, nonblock); 1363 retry: 1364 sk = netlink_getsockbyportid(ssk, portid); 1365 if (IS_ERR(sk)) { 1366 kfree_skb(skb); 1367 return PTR_ERR(sk); 1368 } 1369 if (netlink_is_kernel(sk)) 1370 return netlink_unicast_kernel(sk, skb, ssk); 1371 1372 if (sk_filter(sk, skb)) { 1373 err = skb->len; 1374 kfree_skb(skb); 1375 sock_put(sk); 1376 return err; 1377 } 1378 1379 err = netlink_attachskb(sk, skb, &timeo, ssk); 1380 if (err == 1) 1381 goto retry; 1382 if (err) 1383 return err; 1384 1385 return netlink_sendskb(sk, skb); 1386 } 1387 EXPORT_SYMBOL(netlink_unicast); 1388 1389 int netlink_has_listeners(struct sock *sk, unsigned int group) 1390 { 1391 int res = 0; 1392 struct listeners *listeners; 1393 1394 BUG_ON(!netlink_is_kernel(sk)); 1395 1396 rcu_read_lock(); 1397 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 1398 1399 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 1400 res = test_bit(group - 1, listeners->masks); 1401 1402 rcu_read_unlock(); 1403 1404 return res; 1405 } 1406 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1407 1408 bool netlink_strict_get_check(struct sk_buff *skb) 1409 { 1410 const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk); 1411 1412 return nlk->flags & NETLINK_F_STRICT_CHK; 1413 } 1414 EXPORT_SYMBOL_GPL(netlink_strict_get_check); 1415 1416 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1417 { 1418 struct netlink_sock *nlk = nlk_sk(sk); 1419 1420 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1421 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1422 netlink_skb_set_owner_r(skb, sk); 1423 __netlink_sendskb(sk, skb); 1424 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1425 } 1426 return -1; 1427 } 1428 1429 struct netlink_broadcast_data { 1430 struct sock *exclude_sk; 1431 struct net *net; 1432 u32 portid; 1433 u32 group; 1434 int failure; 1435 int delivery_failure; 1436 int congested; 1437 int delivered; 1438 gfp_t allocation; 1439 struct sk_buff *skb, *skb2; 1440 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); 1441 void *tx_data; 1442 }; 1443 1444 static void do_one_broadcast(struct sock *sk, 1445 struct netlink_broadcast_data *p) 1446 { 1447 struct netlink_sock *nlk = nlk_sk(sk); 1448 int val; 1449 1450 if (p->exclude_sk == sk) 1451 return; 1452 1453 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1454 !test_bit(p->group - 1, nlk->groups)) 1455 return; 1456 1457 if (!net_eq(sock_net(sk), p->net)) { 1458 if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) 1459 return; 1460 1461 if (!peernet_has_id(sock_net(sk), p->net)) 1462 return; 1463 1464 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, 1465 CAP_NET_BROADCAST)) 1466 return; 1467 } 1468 1469 if (p->failure) { 1470 netlink_overrun(sk); 1471 return; 1472 } 1473 1474 sock_hold(sk); 1475 if (p->skb2 == NULL) { 1476 if (skb_shared(p->skb)) { 1477 p->skb2 = skb_clone(p->skb, p->allocation); 1478 } else { 1479 p->skb2 = skb_get(p->skb); 1480 /* 1481 * skb ownership may have been set when 1482 * delivered to a previous socket. 1483 */ 1484 skb_orphan(p->skb2); 1485 } 1486 } 1487 if (p->skb2 == NULL) { 1488 netlink_overrun(sk); 1489 /* Clone failed. Notify ALL listeners. */ 1490 p->failure = 1; 1491 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1492 p->delivery_failure = 1; 1493 goto out; 1494 } 1495 1496 if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1497 kfree_skb(p->skb2); 1498 p->skb2 = NULL; 1499 goto out; 1500 } 1501 1502 if (sk_filter(sk, p->skb2)) { 1503 kfree_skb(p->skb2); 1504 p->skb2 = NULL; 1505 goto out; 1506 } 1507 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); 1508 if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) 1509 NETLINK_CB(p->skb2).nsid_is_set = true; 1510 val = netlink_broadcast_deliver(sk, p->skb2); 1511 if (val < 0) { 1512 netlink_overrun(sk); 1513 if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) 1514 p->delivery_failure = 1; 1515 } else { 1516 p->congested |= val; 1517 p->delivered = 1; 1518 p->skb2 = NULL; 1519 } 1520 out: 1521 sock_put(sk); 1522 } 1523 1524 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, 1525 u32 portid, 1526 u32 group, gfp_t allocation, 1527 int (*filter)(struct sock *dsk, 1528 struct sk_buff *skb, void *data), 1529 void *filter_data) 1530 { 1531 struct net *net = sock_net(ssk); 1532 struct netlink_broadcast_data info; 1533 struct sock *sk; 1534 1535 skb = netlink_trim(skb, allocation); 1536 1537 info.exclude_sk = ssk; 1538 info.net = net; 1539 info.portid = portid; 1540 info.group = group; 1541 info.failure = 0; 1542 info.delivery_failure = 0; 1543 info.congested = 0; 1544 info.delivered = 0; 1545 info.allocation = allocation; 1546 info.skb = skb; 1547 info.skb2 = NULL; 1548 info.tx_filter = filter; 1549 info.tx_data = filter_data; 1550 1551 /* While we sleep in clone, do not allow to change socket list */ 1552 1553 netlink_lock_table(); 1554 1555 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1556 do_one_broadcast(sk, &info); 1557 1558 consume_skb(skb); 1559 1560 netlink_unlock_table(); 1561 1562 if (info.delivery_failure) { 1563 kfree_skb(info.skb2); 1564 return -ENOBUFS; 1565 } 1566 consume_skb(info.skb2); 1567 1568 if (info.delivered) { 1569 if (info.congested && gfpflags_allow_blocking(allocation)) 1570 yield(); 1571 return 0; 1572 } 1573 return -ESRCH; 1574 } 1575 EXPORT_SYMBOL(netlink_broadcast_filtered); 1576 1577 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1578 u32 group, gfp_t allocation) 1579 { 1580 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, 1581 NULL, NULL); 1582 } 1583 EXPORT_SYMBOL(netlink_broadcast); 1584 1585 struct netlink_set_err_data { 1586 struct sock *exclude_sk; 1587 u32 portid; 1588 u32 group; 1589 int code; 1590 }; 1591 1592 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1593 { 1594 struct netlink_sock *nlk = nlk_sk(sk); 1595 int ret = 0; 1596 1597 if (sk == p->exclude_sk) 1598 goto out; 1599 1600 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1601 goto out; 1602 1603 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1604 !test_bit(p->group - 1, nlk->groups)) 1605 goto out; 1606 1607 if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) { 1608 ret = 1; 1609 goto out; 1610 } 1611 1612 sk->sk_err = p->code; 1613 sk_error_report(sk); 1614 out: 1615 return ret; 1616 } 1617 1618 /** 1619 * netlink_set_err - report error to broadcast listeners 1620 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1621 * @portid: the PORTID of a process that we want to skip (if any) 1622 * @group: the broadcast group that will notice the error 1623 * @code: error code, must be negative (as usual in kernelspace) 1624 * 1625 * This function returns the number of broadcast listeners that have set the 1626 * NETLINK_NO_ENOBUFS socket option. 1627 */ 1628 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1629 { 1630 struct netlink_set_err_data info; 1631 unsigned long flags; 1632 struct sock *sk; 1633 int ret = 0; 1634 1635 info.exclude_sk = ssk; 1636 info.portid = portid; 1637 info.group = group; 1638 /* sk->sk_err wants a positive error value */ 1639 info.code = -code; 1640 1641 read_lock_irqsave(&nl_table_lock, flags); 1642 1643 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1644 ret += do_one_set_err(sk, &info); 1645 1646 read_unlock_irqrestore(&nl_table_lock, flags); 1647 return ret; 1648 } 1649 EXPORT_SYMBOL(netlink_set_err); 1650 1651 /* must be called with netlink table grabbed */ 1652 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1653 unsigned int group, 1654 int is_new) 1655 { 1656 int old, new = !!is_new, subscriptions; 1657 1658 old = test_bit(group - 1, nlk->groups); 1659 subscriptions = nlk->subscriptions - old + new; 1660 __assign_bit(group - 1, nlk->groups, new); 1661 netlink_update_subscriptions(&nlk->sk, subscriptions); 1662 netlink_update_listeners(&nlk->sk); 1663 } 1664 1665 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1666 sockptr_t optval, unsigned int optlen) 1667 { 1668 struct sock *sk = sock->sk; 1669 struct netlink_sock *nlk = nlk_sk(sk); 1670 unsigned int val = 0; 1671 int err; 1672 1673 if (level != SOL_NETLINK) 1674 return -ENOPROTOOPT; 1675 1676 if (optlen >= sizeof(int) && 1677 copy_from_sockptr(&val, optval, sizeof(val))) 1678 return -EFAULT; 1679 1680 switch (optname) { 1681 case NETLINK_PKTINFO: 1682 if (val) 1683 nlk->flags |= NETLINK_F_RECV_PKTINFO; 1684 else 1685 nlk->flags &= ~NETLINK_F_RECV_PKTINFO; 1686 err = 0; 1687 break; 1688 case NETLINK_ADD_MEMBERSHIP: 1689 case NETLINK_DROP_MEMBERSHIP: { 1690 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1691 return -EPERM; 1692 err = netlink_realloc_groups(sk); 1693 if (err) 1694 return err; 1695 if (!val || val - 1 >= nlk->ngroups) 1696 return -EINVAL; 1697 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { 1698 err = nlk->netlink_bind(sock_net(sk), val); 1699 if (err) 1700 return err; 1701 } 1702 netlink_table_grab(); 1703 netlink_update_socket_mc(nlk, val, 1704 optname == NETLINK_ADD_MEMBERSHIP); 1705 netlink_table_ungrab(); 1706 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) 1707 nlk->netlink_unbind(sock_net(sk), val); 1708 1709 err = 0; 1710 break; 1711 } 1712 case NETLINK_BROADCAST_ERROR: 1713 if (val) 1714 nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR; 1715 else 1716 nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR; 1717 err = 0; 1718 break; 1719 case NETLINK_NO_ENOBUFS: 1720 if (val) { 1721 nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS; 1722 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 1723 wake_up_interruptible(&nlk->wait); 1724 } else { 1725 nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS; 1726 } 1727 err = 0; 1728 break; 1729 case NETLINK_LISTEN_ALL_NSID: 1730 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1731 return -EPERM; 1732 1733 if (val) 1734 nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; 1735 else 1736 nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; 1737 err = 0; 1738 break; 1739 case NETLINK_CAP_ACK: 1740 if (val) 1741 nlk->flags |= NETLINK_F_CAP_ACK; 1742 else 1743 nlk->flags &= ~NETLINK_F_CAP_ACK; 1744 err = 0; 1745 break; 1746 case NETLINK_EXT_ACK: 1747 if (val) 1748 nlk->flags |= NETLINK_F_EXT_ACK; 1749 else 1750 nlk->flags &= ~NETLINK_F_EXT_ACK; 1751 err = 0; 1752 break; 1753 case NETLINK_GET_STRICT_CHK: 1754 if (val) 1755 nlk->flags |= NETLINK_F_STRICT_CHK; 1756 else 1757 nlk->flags &= ~NETLINK_F_STRICT_CHK; 1758 err = 0; 1759 break; 1760 default: 1761 err = -ENOPROTOOPT; 1762 } 1763 return err; 1764 } 1765 1766 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1767 char __user *optval, int __user *optlen) 1768 { 1769 struct sock *sk = sock->sk; 1770 struct netlink_sock *nlk = nlk_sk(sk); 1771 unsigned int flag; 1772 int len, val; 1773 1774 if (level != SOL_NETLINK) 1775 return -ENOPROTOOPT; 1776 1777 if (get_user(len, optlen)) 1778 return -EFAULT; 1779 if (len < 0) 1780 return -EINVAL; 1781 1782 switch (optname) { 1783 case NETLINK_PKTINFO: 1784 flag = NETLINK_F_RECV_PKTINFO; 1785 break; 1786 case NETLINK_BROADCAST_ERROR: 1787 flag = NETLINK_F_BROADCAST_SEND_ERROR; 1788 break; 1789 case NETLINK_NO_ENOBUFS: 1790 flag = NETLINK_F_RECV_NO_ENOBUFS; 1791 break; 1792 case NETLINK_LIST_MEMBERSHIPS: { 1793 int pos, idx, shift, err = 0; 1794 1795 netlink_lock_table(); 1796 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 1797 if (len - pos < sizeof(u32)) 1798 break; 1799 1800 idx = pos / sizeof(unsigned long); 1801 shift = (pos % sizeof(unsigned long)) * 8; 1802 if (put_user((u32)(nlk->groups[idx] >> shift), 1803 (u32 __user *)(optval + pos))) { 1804 err = -EFAULT; 1805 break; 1806 } 1807 } 1808 if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen)) 1809 err = -EFAULT; 1810 netlink_unlock_table(); 1811 return err; 1812 } 1813 case NETLINK_CAP_ACK: 1814 flag = NETLINK_F_CAP_ACK; 1815 break; 1816 case NETLINK_EXT_ACK: 1817 flag = NETLINK_F_EXT_ACK; 1818 break; 1819 case NETLINK_GET_STRICT_CHK: 1820 flag = NETLINK_F_STRICT_CHK; 1821 break; 1822 default: 1823 return -ENOPROTOOPT; 1824 } 1825 1826 if (len < sizeof(int)) 1827 return -EINVAL; 1828 1829 len = sizeof(int); 1830 val = nlk->flags & flag ? 1 : 0; 1831 1832 if (put_user(len, optlen) || 1833 copy_to_user(optval, &val, len)) 1834 return -EFAULT; 1835 1836 return 0; 1837 } 1838 1839 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1840 { 1841 struct nl_pktinfo info; 1842 1843 info.group = NETLINK_CB(skb).dst_group; 1844 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1845 } 1846 1847 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, 1848 struct sk_buff *skb) 1849 { 1850 if (!NETLINK_CB(skb).nsid_is_set) 1851 return; 1852 1853 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), 1854 &NETLINK_CB(skb).nsid); 1855 } 1856 1857 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1858 { 1859 struct sock *sk = sock->sk; 1860 struct netlink_sock *nlk = nlk_sk(sk); 1861 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1862 u32 dst_portid; 1863 u32 dst_group; 1864 struct sk_buff *skb; 1865 int err; 1866 struct scm_cookie scm; 1867 u32 netlink_skb_flags = 0; 1868 1869 if (msg->msg_flags & MSG_OOB) 1870 return -EOPNOTSUPP; 1871 1872 if (len == 0) { 1873 pr_warn_once("Zero length message leads to an empty skb\n"); 1874 return -ENODATA; 1875 } 1876 1877 err = scm_send(sock, msg, &scm, true); 1878 if (err < 0) 1879 return err; 1880 1881 if (msg->msg_namelen) { 1882 err = -EINVAL; 1883 if (msg->msg_namelen < sizeof(struct sockaddr_nl)) 1884 goto out; 1885 if (addr->nl_family != AF_NETLINK) 1886 goto out; 1887 dst_portid = addr->nl_pid; 1888 dst_group = ffs(addr->nl_groups); 1889 err = -EPERM; 1890 if ((dst_group || dst_portid) && 1891 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1892 goto out; 1893 netlink_skb_flags |= NETLINK_SKB_DST; 1894 } else { 1895 /* Paired with WRITE_ONCE() in netlink_connect() */ 1896 dst_portid = READ_ONCE(nlk->dst_portid); 1897 dst_group = READ_ONCE(nlk->dst_group); 1898 } 1899 1900 /* Paired with WRITE_ONCE() in netlink_insert() */ 1901 if (!READ_ONCE(nlk->bound)) { 1902 err = netlink_autobind(sock); 1903 if (err) 1904 goto out; 1905 } else { 1906 /* Ensure nlk is hashed and visible. */ 1907 smp_rmb(); 1908 } 1909 1910 err = -EMSGSIZE; 1911 if (len > sk->sk_sndbuf - 32) 1912 goto out; 1913 err = -ENOBUFS; 1914 skb = netlink_alloc_large_skb(len, dst_group); 1915 if (skb == NULL) 1916 goto out; 1917 1918 NETLINK_CB(skb).portid = nlk->portid; 1919 NETLINK_CB(skb).dst_group = dst_group; 1920 NETLINK_CB(skb).creds = scm.creds; 1921 NETLINK_CB(skb).flags = netlink_skb_flags; 1922 1923 err = -EFAULT; 1924 if (memcpy_from_msg(skb_put(skb, len), msg, len)) { 1925 kfree_skb(skb); 1926 goto out; 1927 } 1928 1929 err = security_netlink_send(sk, skb); 1930 if (err) { 1931 kfree_skb(skb); 1932 goto out; 1933 } 1934 1935 if (dst_group) { 1936 refcount_inc(&skb->users); 1937 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 1938 } 1939 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags & MSG_DONTWAIT); 1940 1941 out: 1942 scm_destroy(&scm); 1943 return err; 1944 } 1945 1946 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1947 int flags) 1948 { 1949 struct scm_cookie scm; 1950 struct sock *sk = sock->sk; 1951 struct netlink_sock *nlk = nlk_sk(sk); 1952 size_t copied, max_recvmsg_len; 1953 struct sk_buff *skb, *data_skb; 1954 int err, ret; 1955 1956 if (flags & MSG_OOB) 1957 return -EOPNOTSUPP; 1958 1959 copied = 0; 1960 1961 skb = skb_recv_datagram(sk, flags, &err); 1962 if (skb == NULL) 1963 goto out; 1964 1965 data_skb = skb; 1966 1967 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1968 if (unlikely(skb_shinfo(skb)->frag_list)) { 1969 /* 1970 * If this skb has a frag_list, then here that means that we 1971 * will have to use the frag_list skb's data for compat tasks 1972 * and the regular skb's data for normal (non-compat) tasks. 1973 * 1974 * If we need to send the compat skb, assign it to the 1975 * 'data_skb' variable so that it will be used below for data 1976 * copying. We keep 'skb' for everything else, including 1977 * freeing both later. 1978 */ 1979 if (flags & MSG_CMSG_COMPAT) 1980 data_skb = skb_shinfo(skb)->frag_list; 1981 } 1982 #endif 1983 1984 /* Record the max length of recvmsg() calls for future allocations */ 1985 max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len); 1986 max_recvmsg_len = min_t(size_t, max_recvmsg_len, 1987 SKB_WITH_OVERHEAD(32768)); 1988 WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len); 1989 1990 copied = data_skb->len; 1991 if (len < copied) { 1992 msg->msg_flags |= MSG_TRUNC; 1993 copied = len; 1994 } 1995 1996 err = skb_copy_datagram_msg(data_skb, 0, msg, copied); 1997 1998 if (msg->msg_name) { 1999 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 2000 addr->nl_family = AF_NETLINK; 2001 addr->nl_pad = 0; 2002 addr->nl_pid = NETLINK_CB(skb).portid; 2003 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 2004 msg->msg_namelen = sizeof(*addr); 2005 } 2006 2007 if (nlk->flags & NETLINK_F_RECV_PKTINFO) 2008 netlink_cmsg_recv_pktinfo(msg, skb); 2009 if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) 2010 netlink_cmsg_listen_all_nsid(sk, msg, skb); 2011 2012 memset(&scm, 0, sizeof(scm)); 2013 scm.creds = *NETLINK_CREDS(skb); 2014 if (flags & MSG_TRUNC) 2015 copied = data_skb->len; 2016 2017 skb_free_datagram(sk, skb); 2018 2019 if (READ_ONCE(nlk->cb_running) && 2020 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 2021 ret = netlink_dump(sk); 2022 if (ret) { 2023 sk->sk_err = -ret; 2024 sk_error_report(sk); 2025 } 2026 } 2027 2028 scm_recv(sock, msg, &scm, flags); 2029 out: 2030 netlink_rcv_wake(sk); 2031 return err ? : copied; 2032 } 2033 2034 static void netlink_data_ready(struct sock *sk) 2035 { 2036 BUG(); 2037 } 2038 2039 /* 2040 * We export these functions to other modules. They provide a 2041 * complete set of kernel non-blocking support for message 2042 * queueing. 2043 */ 2044 2045 struct sock * 2046 __netlink_kernel_create(struct net *net, int unit, struct module *module, 2047 struct netlink_kernel_cfg *cfg) 2048 { 2049 struct socket *sock; 2050 struct sock *sk; 2051 struct netlink_sock *nlk; 2052 struct listeners *listeners = NULL; 2053 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; 2054 unsigned int groups; 2055 2056 BUG_ON(!nl_table); 2057 2058 if (unit < 0 || unit >= MAX_LINKS) 2059 return NULL; 2060 2061 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 2062 return NULL; 2063 2064 if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) 2065 goto out_sock_release_nosk; 2066 2067 sk = sock->sk; 2068 2069 if (!cfg || cfg->groups < 32) 2070 groups = 32; 2071 else 2072 groups = cfg->groups; 2073 2074 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2075 if (!listeners) 2076 goto out_sock_release; 2077 2078 sk->sk_data_ready = netlink_data_ready; 2079 if (cfg && cfg->input) 2080 nlk_sk(sk)->netlink_rcv = cfg->input; 2081 2082 if (netlink_insert(sk, 0)) 2083 goto out_sock_release; 2084 2085 nlk = nlk_sk(sk); 2086 nlk->flags |= NETLINK_F_KERNEL_SOCKET; 2087 2088 netlink_table_grab(); 2089 if (!nl_table[unit].registered) { 2090 nl_table[unit].groups = groups; 2091 rcu_assign_pointer(nl_table[unit].listeners, listeners); 2092 nl_table[unit].cb_mutex = cb_mutex; 2093 nl_table[unit].module = module; 2094 if (cfg) { 2095 nl_table[unit].bind = cfg->bind; 2096 nl_table[unit].unbind = cfg->unbind; 2097 nl_table[unit].release = cfg->release; 2098 nl_table[unit].flags = cfg->flags; 2099 } 2100 nl_table[unit].registered = 1; 2101 } else { 2102 kfree(listeners); 2103 nl_table[unit].registered++; 2104 } 2105 netlink_table_ungrab(); 2106 return sk; 2107 2108 out_sock_release: 2109 kfree(listeners); 2110 netlink_kernel_release(sk); 2111 return NULL; 2112 2113 out_sock_release_nosk: 2114 sock_release(sock); 2115 return NULL; 2116 } 2117 EXPORT_SYMBOL(__netlink_kernel_create); 2118 2119 void 2120 netlink_kernel_release(struct sock *sk) 2121 { 2122 if (sk == NULL || sk->sk_socket == NULL) 2123 return; 2124 2125 sock_release(sk->sk_socket); 2126 } 2127 EXPORT_SYMBOL(netlink_kernel_release); 2128 2129 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 2130 { 2131 struct listeners *new, *old; 2132 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 2133 2134 if (groups < 32) 2135 groups = 32; 2136 2137 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 2138 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 2139 if (!new) 2140 return -ENOMEM; 2141 old = nl_deref_protected(tbl->listeners); 2142 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 2143 rcu_assign_pointer(tbl->listeners, new); 2144 2145 kfree_rcu(old, rcu); 2146 } 2147 tbl->groups = groups; 2148 2149 return 0; 2150 } 2151 2152 /** 2153 * netlink_change_ngroups - change number of multicast groups 2154 * 2155 * This changes the number of multicast groups that are available 2156 * on a certain netlink family. Note that it is not possible to 2157 * change the number of groups to below 32. Also note that it does 2158 * not implicitly call netlink_clear_multicast_users() when the 2159 * number of groups is reduced. 2160 * 2161 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 2162 * @groups: The new number of groups. 2163 */ 2164 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 2165 { 2166 int err; 2167 2168 netlink_table_grab(); 2169 err = __netlink_change_ngroups(sk, groups); 2170 netlink_table_ungrab(); 2171 2172 return err; 2173 } 2174 2175 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2176 { 2177 struct sock *sk; 2178 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 2179 2180 sk_for_each_bound(sk, &tbl->mc_list) 2181 netlink_update_socket_mc(nlk_sk(sk), group, 0); 2182 } 2183 2184 struct nlmsghdr * 2185 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2186 { 2187 struct nlmsghdr *nlh; 2188 int size = nlmsg_msg_size(len); 2189 2190 nlh = skb_put(skb, NLMSG_ALIGN(size)); 2191 nlh->nlmsg_type = type; 2192 nlh->nlmsg_len = size; 2193 nlh->nlmsg_flags = flags; 2194 nlh->nlmsg_pid = portid; 2195 nlh->nlmsg_seq = seq; 2196 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2197 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2198 return nlh; 2199 } 2200 EXPORT_SYMBOL(__nlmsg_put); 2201 2202 /* 2203 * It looks a bit ugly. 2204 * It would be better to create kernel thread. 2205 */ 2206 2207 static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, 2208 struct netlink_callback *cb, 2209 struct netlink_ext_ack *extack) 2210 { 2211 struct nlmsghdr *nlh; 2212 2213 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno), 2214 NLM_F_MULTI | cb->answer_flags); 2215 if (WARN_ON(!nlh)) 2216 return -ENOBUFS; 2217 2218 nl_dump_check_consistent(cb, nlh); 2219 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno)); 2220 2221 if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) { 2222 nlh->nlmsg_flags |= NLM_F_ACK_TLVS; 2223 if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)) 2224 nlmsg_end(skb, nlh); 2225 } 2226 2227 return 0; 2228 } 2229 2230 static int netlink_dump(struct sock *sk) 2231 { 2232 struct netlink_sock *nlk = nlk_sk(sk); 2233 struct netlink_ext_ack extack = {}; 2234 struct netlink_callback *cb; 2235 struct sk_buff *skb = NULL; 2236 size_t max_recvmsg_len; 2237 struct module *module; 2238 int err = -ENOBUFS; 2239 int alloc_min_size; 2240 int alloc_size; 2241 2242 mutex_lock(nlk->cb_mutex); 2243 if (!nlk->cb_running) { 2244 err = -EINVAL; 2245 goto errout_skb; 2246 } 2247 2248 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2249 goto errout_skb; 2250 2251 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2252 * required, but it makes sense to _attempt_ a 16K bytes allocation 2253 * to reduce number of system calls on dump operations, if user 2254 * ever provided a big enough buffer. 2255 */ 2256 cb = &nlk->cb; 2257 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2258 2259 max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len); 2260 if (alloc_min_size < max_recvmsg_len) { 2261 alloc_size = max_recvmsg_len; 2262 skb = alloc_skb(alloc_size, 2263 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | 2264 __GFP_NOWARN | __GFP_NORETRY); 2265 } 2266 if (!skb) { 2267 alloc_size = alloc_min_size; 2268 skb = alloc_skb(alloc_size, GFP_KERNEL); 2269 } 2270 if (!skb) 2271 goto errout_skb; 2272 2273 /* Trim skb to allocated size. User is expected to provide buffer as 2274 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at 2275 * netlink_recvmsg())). dump will pack as many smaller messages as 2276 * could fit within the allocated skb. skb is typically allocated 2277 * with larger space than required (could be as much as near 2x the 2278 * requested size with align to next power of 2 approach). Allowing 2279 * dump to use the excess space makes it difficult for a user to have a 2280 * reasonable static buffer based on the expected largest dump of a 2281 * single netdev. The outcome is MSG_TRUNC error. 2282 */ 2283 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2284 2285 /* Make sure malicious BPF programs can not read unitialized memory 2286 * from skb->head -> skb->data 2287 */ 2288 skb_reset_network_header(skb); 2289 skb_reset_mac_header(skb); 2290 2291 netlink_skb_set_owner_r(skb, sk); 2292 2293 if (nlk->dump_done_errno > 0) { 2294 cb->extack = &extack; 2295 nlk->dump_done_errno = cb->dump(skb, cb); 2296 cb->extack = NULL; 2297 } 2298 2299 if (nlk->dump_done_errno > 0 || 2300 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { 2301 mutex_unlock(nlk->cb_mutex); 2302 2303 if (sk_filter(sk, skb)) 2304 kfree_skb(skb); 2305 else 2306 __netlink_sendskb(sk, skb); 2307 return 0; 2308 } 2309 2310 if (netlink_dump_done(nlk, skb, cb, &extack)) 2311 goto errout_skb; 2312 2313 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 2314 /* frag_list skb's data is used for compat tasks 2315 * and the regular skb's data for normal (non-compat) tasks. 2316 * See netlink_recvmsg(). 2317 */ 2318 if (unlikely(skb_shinfo(skb)->frag_list)) { 2319 if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack)) 2320 goto errout_skb; 2321 } 2322 #endif 2323 2324 if (sk_filter(sk, skb)) 2325 kfree_skb(skb); 2326 else 2327 __netlink_sendskb(sk, skb); 2328 2329 if (cb->done) 2330 cb->done(cb); 2331 2332 WRITE_ONCE(nlk->cb_running, false); 2333 module = cb->module; 2334 skb = cb->skb; 2335 mutex_unlock(nlk->cb_mutex); 2336 module_put(module); 2337 consume_skb(skb); 2338 return 0; 2339 2340 errout_skb: 2341 mutex_unlock(nlk->cb_mutex); 2342 kfree_skb(skb); 2343 return err; 2344 } 2345 2346 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 2347 const struct nlmsghdr *nlh, 2348 struct netlink_dump_control *control) 2349 { 2350 struct netlink_sock *nlk, *nlk2; 2351 struct netlink_callback *cb; 2352 struct sock *sk; 2353 int ret; 2354 2355 refcount_inc(&skb->users); 2356 2357 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2358 if (sk == NULL) { 2359 ret = -ECONNREFUSED; 2360 goto error_free; 2361 } 2362 2363 nlk = nlk_sk(sk); 2364 mutex_lock(nlk->cb_mutex); 2365 /* A dump is in progress... */ 2366 if (nlk->cb_running) { 2367 ret = -EBUSY; 2368 goto error_unlock; 2369 } 2370 /* add reference of module which cb->dump belongs to */ 2371 if (!try_module_get(control->module)) { 2372 ret = -EPROTONOSUPPORT; 2373 goto error_unlock; 2374 } 2375 2376 cb = &nlk->cb; 2377 memset(cb, 0, sizeof(*cb)); 2378 cb->dump = control->dump; 2379 cb->done = control->done; 2380 cb->nlh = nlh; 2381 cb->data = control->data; 2382 cb->module = control->module; 2383 cb->min_dump_alloc = control->min_dump_alloc; 2384 cb->skb = skb; 2385 2386 nlk2 = nlk_sk(NETLINK_CB(skb).sk); 2387 cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK); 2388 2389 if (control->start) { 2390 cb->extack = control->extack; 2391 ret = control->start(cb); 2392 cb->extack = NULL; 2393 if (ret) 2394 goto error_put; 2395 } 2396 2397 WRITE_ONCE(nlk->cb_running, true); 2398 nlk->dump_done_errno = INT_MAX; 2399 2400 mutex_unlock(nlk->cb_mutex); 2401 2402 ret = netlink_dump(sk); 2403 2404 sock_put(sk); 2405 2406 if (ret) 2407 return ret; 2408 2409 /* We successfully started a dump, by returning -EINTR we 2410 * signal not to send ACK even if it was requested. 2411 */ 2412 return -EINTR; 2413 2414 error_put: 2415 module_put(control->module); 2416 error_unlock: 2417 sock_put(sk); 2418 mutex_unlock(nlk->cb_mutex); 2419 error_free: 2420 kfree_skb(skb); 2421 return ret; 2422 } 2423 EXPORT_SYMBOL(__netlink_dump_start); 2424 2425 static size_t 2426 netlink_ack_tlv_len(struct netlink_sock *nlk, int err, 2427 const struct netlink_ext_ack *extack) 2428 { 2429 size_t tlvlen; 2430 2431 if (!extack || !(nlk->flags & NETLINK_F_EXT_ACK)) 2432 return 0; 2433 2434 tlvlen = 0; 2435 if (extack->_msg) 2436 tlvlen += nla_total_size(strlen(extack->_msg) + 1); 2437 if (extack->cookie_len) 2438 tlvlen += nla_total_size(extack->cookie_len); 2439 2440 /* Following attributes are only reported as error (not warning) */ 2441 if (!err) 2442 return tlvlen; 2443 2444 if (extack->bad_attr) 2445 tlvlen += nla_total_size(sizeof(u32)); 2446 if (extack->policy) 2447 tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy); 2448 if (extack->miss_type) 2449 tlvlen += nla_total_size(sizeof(u32)); 2450 if (extack->miss_nest) 2451 tlvlen += nla_total_size(sizeof(u32)); 2452 2453 return tlvlen; 2454 } 2455 2456 static void 2457 netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb, 2458 struct nlmsghdr *nlh, int err, 2459 const struct netlink_ext_ack *extack) 2460 { 2461 if (extack->_msg) 2462 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)); 2463 if (extack->cookie_len) 2464 WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, 2465 extack->cookie_len, extack->cookie)); 2466 2467 if (!err) 2468 return; 2469 2470 if (extack->bad_attr && 2471 !WARN_ON((u8 *)extack->bad_attr < in_skb->data || 2472 (u8 *)extack->bad_attr >= in_skb->data + in_skb->len)) 2473 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, 2474 (u8 *)extack->bad_attr - (u8 *)nlh)); 2475 if (extack->policy) 2476 netlink_policy_dump_write_attr(skb, extack->policy, 2477 NLMSGERR_ATTR_POLICY); 2478 if (extack->miss_type) 2479 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE, 2480 extack->miss_type)); 2481 if (extack->miss_nest && 2482 !WARN_ON((u8 *)extack->miss_nest < in_skb->data || 2483 (u8 *)extack->miss_nest > in_skb->data + in_skb->len)) 2484 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST, 2485 (u8 *)extack->miss_nest - (u8 *)nlh)); 2486 } 2487 2488 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, 2489 const struct netlink_ext_ack *extack) 2490 { 2491 struct sk_buff *skb; 2492 struct nlmsghdr *rep; 2493 struct nlmsgerr *errmsg; 2494 size_t payload = sizeof(*errmsg); 2495 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); 2496 unsigned int flags = 0; 2497 size_t tlvlen; 2498 2499 /* Error messages get the original request appened, unless the user 2500 * requests to cap the error message, and get extra error data if 2501 * requested. 2502 */ 2503 if (err && !(nlk->flags & NETLINK_F_CAP_ACK)) 2504 payload += nlmsg_len(nlh); 2505 else 2506 flags |= NLM_F_CAPPED; 2507 2508 tlvlen = netlink_ack_tlv_len(nlk, err, extack); 2509 if (tlvlen) 2510 flags |= NLM_F_ACK_TLVS; 2511 2512 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); 2513 if (!skb) 2514 goto err_skb; 2515 2516 rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2517 NLMSG_ERROR, sizeof(*errmsg), flags); 2518 if (!rep) 2519 goto err_bad_put; 2520 errmsg = nlmsg_data(rep); 2521 errmsg->error = err; 2522 errmsg->msg = *nlh; 2523 2524 if (!(flags & NLM_F_CAPPED)) { 2525 if (!nlmsg_append(skb, nlmsg_len(nlh))) 2526 goto err_bad_put; 2527 2528 memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh), 2529 nlmsg_len(nlh)); 2530 } 2531 2532 if (tlvlen) 2533 netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); 2534 2535 nlmsg_end(skb, rep); 2536 2537 nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); 2538 2539 return; 2540 2541 err_bad_put: 2542 nlmsg_free(skb); 2543 err_skb: 2544 NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; 2545 sk_error_report(NETLINK_CB(in_skb).sk); 2546 } 2547 EXPORT_SYMBOL(netlink_ack); 2548 2549 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 2550 struct nlmsghdr *, 2551 struct netlink_ext_ack *)) 2552 { 2553 struct netlink_ext_ack extack; 2554 struct nlmsghdr *nlh; 2555 int err; 2556 2557 while (skb->len >= nlmsg_total_size(0)) { 2558 int msglen; 2559 2560 memset(&extack, 0, sizeof(extack)); 2561 nlh = nlmsg_hdr(skb); 2562 err = 0; 2563 2564 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 2565 return 0; 2566 2567 /* Only requests are handled by the kernel */ 2568 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 2569 goto ack; 2570 2571 /* Skip control messages */ 2572 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 2573 goto ack; 2574 2575 err = cb(skb, nlh, &extack); 2576 if (err == -EINTR) 2577 goto skip; 2578 2579 ack: 2580 if (nlh->nlmsg_flags & NLM_F_ACK || err) 2581 netlink_ack(skb, nlh, err, &extack); 2582 2583 skip: 2584 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 2585 if (msglen > skb->len) 2586 msglen = skb->len; 2587 skb_pull(skb, msglen); 2588 } 2589 2590 return 0; 2591 } 2592 EXPORT_SYMBOL(netlink_rcv_skb); 2593 2594 /** 2595 * nlmsg_notify - send a notification netlink message 2596 * @sk: netlink socket to use 2597 * @skb: notification message 2598 * @portid: destination netlink portid for reports or 0 2599 * @group: destination multicast group or 0 2600 * @report: 1 to report back, 0 to disable 2601 * @flags: allocation flags 2602 */ 2603 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 2604 unsigned int group, int report, gfp_t flags) 2605 { 2606 int err = 0; 2607 2608 if (group) { 2609 int exclude_portid = 0; 2610 2611 if (report) { 2612 refcount_inc(&skb->users); 2613 exclude_portid = portid; 2614 } 2615 2616 /* errors reported via destination sk->sk_err, but propagate 2617 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 2618 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 2619 if (err == -ESRCH) 2620 err = 0; 2621 } 2622 2623 if (report) { 2624 int err2; 2625 2626 err2 = nlmsg_unicast(sk, skb, portid); 2627 if (!err) 2628 err = err2; 2629 } 2630 2631 return err; 2632 } 2633 EXPORT_SYMBOL(nlmsg_notify); 2634 2635 #ifdef CONFIG_PROC_FS 2636 struct nl_seq_iter { 2637 struct seq_net_private p; 2638 struct rhashtable_iter hti; 2639 int link; 2640 }; 2641 2642 static void netlink_walk_start(struct nl_seq_iter *iter) 2643 { 2644 rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti); 2645 rhashtable_walk_start(&iter->hti); 2646 } 2647 2648 static void netlink_walk_stop(struct nl_seq_iter *iter) 2649 { 2650 rhashtable_walk_stop(&iter->hti); 2651 rhashtable_walk_exit(&iter->hti); 2652 } 2653 2654 static void *__netlink_seq_next(struct seq_file *seq) 2655 { 2656 struct nl_seq_iter *iter = seq->private; 2657 struct netlink_sock *nlk; 2658 2659 do { 2660 for (;;) { 2661 nlk = rhashtable_walk_next(&iter->hti); 2662 2663 if (IS_ERR(nlk)) { 2664 if (PTR_ERR(nlk) == -EAGAIN) 2665 continue; 2666 2667 return nlk; 2668 } 2669 2670 if (nlk) 2671 break; 2672 2673 netlink_walk_stop(iter); 2674 if (++iter->link >= MAX_LINKS) 2675 return NULL; 2676 2677 netlink_walk_start(iter); 2678 } 2679 } while (sock_net(&nlk->sk) != seq_file_net(seq)); 2680 2681 return nlk; 2682 } 2683 2684 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) 2685 __acquires(RCU) 2686 { 2687 struct nl_seq_iter *iter = seq->private; 2688 void *obj = SEQ_START_TOKEN; 2689 loff_t pos; 2690 2691 iter->link = 0; 2692 2693 netlink_walk_start(iter); 2694 2695 for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) 2696 obj = __netlink_seq_next(seq); 2697 2698 return obj; 2699 } 2700 2701 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2702 { 2703 ++*pos; 2704 return __netlink_seq_next(seq); 2705 } 2706 2707 static void netlink_native_seq_stop(struct seq_file *seq, void *v) 2708 { 2709 struct nl_seq_iter *iter = seq->private; 2710 2711 if (iter->link >= MAX_LINKS) 2712 return; 2713 2714 netlink_walk_stop(iter); 2715 } 2716 2717 2718 static int netlink_native_seq_show(struct seq_file *seq, void *v) 2719 { 2720 if (v == SEQ_START_TOKEN) { 2721 seq_puts(seq, 2722 "sk Eth Pid Groups " 2723 "Rmem Wmem Dump Locks Drops Inode\n"); 2724 } else { 2725 struct sock *s = v; 2726 struct netlink_sock *nlk = nlk_sk(s); 2727 2728 seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n", 2729 s, 2730 s->sk_protocol, 2731 nlk->portid, 2732 nlk->groups ? (u32)nlk->groups[0] : 0, 2733 sk_rmem_alloc_get(s), 2734 sk_wmem_alloc_get(s), 2735 READ_ONCE(nlk->cb_running), 2736 refcount_read(&s->sk_refcnt), 2737 atomic_read(&s->sk_drops), 2738 sock_i_ino(s) 2739 ); 2740 2741 } 2742 return 0; 2743 } 2744 2745 #ifdef CONFIG_BPF_SYSCALL 2746 struct bpf_iter__netlink { 2747 __bpf_md_ptr(struct bpf_iter_meta *, meta); 2748 __bpf_md_ptr(struct netlink_sock *, sk); 2749 }; 2750 2751 DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk) 2752 2753 static int netlink_prog_seq_show(struct bpf_prog *prog, 2754 struct bpf_iter_meta *meta, 2755 void *v) 2756 { 2757 struct bpf_iter__netlink ctx; 2758 2759 meta->seq_num--; /* skip SEQ_START_TOKEN */ 2760 ctx.meta = meta; 2761 ctx.sk = nlk_sk((struct sock *)v); 2762 return bpf_iter_run_prog(prog, &ctx); 2763 } 2764 2765 static int netlink_seq_show(struct seq_file *seq, void *v) 2766 { 2767 struct bpf_iter_meta meta; 2768 struct bpf_prog *prog; 2769 2770 meta.seq = seq; 2771 prog = bpf_iter_get_info(&meta, false); 2772 if (!prog) 2773 return netlink_native_seq_show(seq, v); 2774 2775 if (v != SEQ_START_TOKEN) 2776 return netlink_prog_seq_show(prog, &meta, v); 2777 2778 return 0; 2779 } 2780 2781 static void netlink_seq_stop(struct seq_file *seq, void *v) 2782 { 2783 struct bpf_iter_meta meta; 2784 struct bpf_prog *prog; 2785 2786 if (!v) { 2787 meta.seq = seq; 2788 prog = bpf_iter_get_info(&meta, true); 2789 if (prog) 2790 (void)netlink_prog_seq_show(prog, &meta, v); 2791 } 2792 2793 netlink_native_seq_stop(seq, v); 2794 } 2795 #else 2796 static int netlink_seq_show(struct seq_file *seq, void *v) 2797 { 2798 return netlink_native_seq_show(seq, v); 2799 } 2800 2801 static void netlink_seq_stop(struct seq_file *seq, void *v) 2802 { 2803 netlink_native_seq_stop(seq, v); 2804 } 2805 #endif 2806 2807 static const struct seq_operations netlink_seq_ops = { 2808 .start = netlink_seq_start, 2809 .next = netlink_seq_next, 2810 .stop = netlink_seq_stop, 2811 .show = netlink_seq_show, 2812 }; 2813 #endif 2814 2815 int netlink_register_notifier(struct notifier_block *nb) 2816 { 2817 return blocking_notifier_chain_register(&netlink_chain, nb); 2818 } 2819 EXPORT_SYMBOL(netlink_register_notifier); 2820 2821 int netlink_unregister_notifier(struct notifier_block *nb) 2822 { 2823 return blocking_notifier_chain_unregister(&netlink_chain, nb); 2824 } 2825 EXPORT_SYMBOL(netlink_unregister_notifier); 2826 2827 static const struct proto_ops netlink_ops = { 2828 .family = PF_NETLINK, 2829 .owner = THIS_MODULE, 2830 .release = netlink_release, 2831 .bind = netlink_bind, 2832 .connect = netlink_connect, 2833 .socketpair = sock_no_socketpair, 2834 .accept = sock_no_accept, 2835 .getname = netlink_getname, 2836 .poll = datagram_poll, 2837 .ioctl = netlink_ioctl, 2838 .listen = sock_no_listen, 2839 .shutdown = sock_no_shutdown, 2840 .setsockopt = netlink_setsockopt, 2841 .getsockopt = netlink_getsockopt, 2842 .sendmsg = netlink_sendmsg, 2843 .recvmsg = netlink_recvmsg, 2844 .mmap = sock_no_mmap, 2845 }; 2846 2847 static const struct net_proto_family netlink_family_ops = { 2848 .family = PF_NETLINK, 2849 .create = netlink_create, 2850 .owner = THIS_MODULE, /* for consistency 8) */ 2851 }; 2852 2853 static int __net_init netlink_net_init(struct net *net) 2854 { 2855 #ifdef CONFIG_PROC_FS 2856 if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops, 2857 sizeof(struct nl_seq_iter))) 2858 return -ENOMEM; 2859 #endif 2860 return 0; 2861 } 2862 2863 static void __net_exit netlink_net_exit(struct net *net) 2864 { 2865 #ifdef CONFIG_PROC_FS 2866 remove_proc_entry("netlink", net->proc_net); 2867 #endif 2868 } 2869 2870 static void __init netlink_add_usersock_entry(void) 2871 { 2872 struct listeners *listeners; 2873 int groups = 32; 2874 2875 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2876 if (!listeners) 2877 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2878 2879 netlink_table_grab(); 2880 2881 nl_table[NETLINK_USERSOCK].groups = groups; 2882 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2883 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2884 nl_table[NETLINK_USERSOCK].registered = 1; 2885 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2886 2887 netlink_table_ungrab(); 2888 } 2889 2890 static struct pernet_operations __net_initdata netlink_net_ops = { 2891 .init = netlink_net_init, 2892 .exit = netlink_net_exit, 2893 }; 2894 2895 static inline u32 netlink_hash(const void *data, u32 len, u32 seed) 2896 { 2897 const struct netlink_sock *nlk = data; 2898 struct netlink_compare_arg arg; 2899 2900 netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); 2901 return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); 2902 } 2903 2904 static const struct rhashtable_params netlink_rhashtable_params = { 2905 .head_offset = offsetof(struct netlink_sock, node), 2906 .key_len = netlink_compare_arg_len, 2907 .obj_hashfn = netlink_hash, 2908 .obj_cmpfn = netlink_compare, 2909 .automatic_shrinking = true, 2910 }; 2911 2912 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2913 BTF_ID_LIST(btf_netlink_sock_id) 2914 BTF_ID(struct, netlink_sock) 2915 2916 static const struct bpf_iter_seq_info netlink_seq_info = { 2917 .seq_ops = &netlink_seq_ops, 2918 .init_seq_private = bpf_iter_init_seq_net, 2919 .fini_seq_private = bpf_iter_fini_seq_net, 2920 .seq_priv_size = sizeof(struct nl_seq_iter), 2921 }; 2922 2923 static struct bpf_iter_reg netlink_reg_info = { 2924 .target = "netlink", 2925 .ctx_arg_info_size = 1, 2926 .ctx_arg_info = { 2927 { offsetof(struct bpf_iter__netlink, sk), 2928 PTR_TO_BTF_ID_OR_NULL }, 2929 }, 2930 .seq_info = &netlink_seq_info, 2931 }; 2932 2933 static int __init bpf_iter_register(void) 2934 { 2935 netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; 2936 return bpf_iter_reg_target(&netlink_reg_info); 2937 } 2938 #endif 2939 2940 static int __init netlink_proto_init(void) 2941 { 2942 int i; 2943 int err = proto_register(&netlink_proto, 0); 2944 2945 if (err != 0) 2946 goto out; 2947 2948 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2949 err = bpf_iter_register(); 2950 if (err) 2951 goto out; 2952 #endif 2953 2954 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb)); 2955 2956 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2957 if (!nl_table) 2958 goto panic; 2959 2960 for (i = 0; i < MAX_LINKS; i++) { 2961 if (rhashtable_init(&nl_table[i].hash, 2962 &netlink_rhashtable_params) < 0) { 2963 while (--i > 0) 2964 rhashtable_destroy(&nl_table[i].hash); 2965 kfree(nl_table); 2966 goto panic; 2967 } 2968 } 2969 2970 netlink_add_usersock_entry(); 2971 2972 sock_register(&netlink_family_ops); 2973 register_pernet_subsys(&netlink_net_ops); 2974 register_pernet_subsys(&netlink_tap_net_ops); 2975 /* The netlink device handler may be needed early. */ 2976 rtnetlink_init(); 2977 out: 2978 return err; 2979 panic: 2980 panic("netlink_init: Cannot allocate nl_table\n"); 2981 } 2982 2983 core_initcall(netlink_proto_init); 2984