1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/splice.h> 116 #include <linux/freezer.h> 117 #include <linux/file.h> 118 #include <linux/btf_ids.h> 119 120 #include "scm.h" 121 122 static atomic_long_t unix_nr_socks; 123 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 124 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 125 126 /* SMP locking strategy: 127 * hash table is protected with spinlock. 128 * each socket state is protected by separate spinlock. 129 */ 130 131 static unsigned int unix_unbound_hash(struct sock *sk) 132 { 133 unsigned long hash = (unsigned long)sk; 134 135 hash ^= hash >> 16; 136 hash ^= hash >> 8; 137 hash ^= sk->sk_type; 138 139 return hash & UNIX_HASH_MOD; 140 } 141 142 static unsigned int unix_bsd_hash(struct inode *i) 143 { 144 return i->i_ino & UNIX_HASH_MOD; 145 } 146 147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 148 int addr_len, int type) 149 { 150 __wsum csum = csum_partial(sunaddr, addr_len, 0); 151 unsigned int hash; 152 153 hash = (__force unsigned int)csum_fold(csum); 154 hash ^= hash >> 8; 155 hash ^= type; 156 157 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 158 } 159 160 static void unix_table_double_lock(struct net *net, 161 unsigned int hash1, unsigned int hash2) 162 { 163 if (hash1 == hash2) { 164 spin_lock(&net->unx.table.locks[hash1]); 165 return; 166 } 167 168 if (hash1 > hash2) 169 swap(hash1, hash2); 170 171 spin_lock(&net->unx.table.locks[hash1]); 172 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 173 } 174 175 static void unix_table_double_unlock(struct net *net, 176 unsigned int hash1, unsigned int hash2) 177 { 178 if (hash1 == hash2) { 179 spin_unlock(&net->unx.table.locks[hash1]); 180 return; 181 } 182 183 spin_unlock(&net->unx.table.locks[hash1]); 184 spin_unlock(&net->unx.table.locks[hash2]); 185 } 186 187 #ifdef CONFIG_SECURITY_NETWORK 188 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 189 { 190 UNIXCB(skb).secid = scm->secid; 191 } 192 193 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 194 { 195 scm->secid = UNIXCB(skb).secid; 196 } 197 198 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 199 { 200 return (scm->secid == UNIXCB(skb).secid); 201 } 202 #else 203 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 204 { } 205 206 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 207 { } 208 209 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 210 { 211 return true; 212 } 213 #endif /* CONFIG_SECURITY_NETWORK */ 214 215 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 216 { 217 return unix_peer(osk) == sk; 218 } 219 220 static inline int unix_may_send(struct sock *sk, struct sock *osk) 221 { 222 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 223 } 224 225 static inline int unix_recvq_full(const struct sock *sk) 226 { 227 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 228 } 229 230 static inline int unix_recvq_full_lockless(const struct sock *sk) 231 { 232 return skb_queue_len_lockless(&sk->sk_receive_queue) > 233 READ_ONCE(sk->sk_max_ack_backlog); 234 } 235 236 struct sock *unix_peer_get(struct sock *s) 237 { 238 struct sock *peer; 239 240 unix_state_lock(s); 241 peer = unix_peer(s); 242 if (peer) 243 sock_hold(peer); 244 unix_state_unlock(s); 245 return peer; 246 } 247 EXPORT_SYMBOL_GPL(unix_peer_get); 248 249 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 250 int addr_len) 251 { 252 struct unix_address *addr; 253 254 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 255 if (!addr) 256 return NULL; 257 258 refcount_set(&addr->refcnt, 1); 259 addr->len = addr_len; 260 memcpy(addr->name, sunaddr, addr_len); 261 262 return addr; 263 } 264 265 static inline void unix_release_addr(struct unix_address *addr) 266 { 267 if (refcount_dec_and_test(&addr->refcnt)) 268 kfree(addr); 269 } 270 271 /* 272 * Check unix socket name: 273 * - should be not zero length. 274 * - if started by not zero, should be NULL terminated (FS object) 275 * - if started by zero, it is abstract name. 276 */ 277 278 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 279 { 280 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 281 addr_len > sizeof(*sunaddr)) 282 return -EINVAL; 283 284 if (sunaddr->sun_family != AF_UNIX) 285 return -EINVAL; 286 287 return 0; 288 } 289 290 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 291 { 292 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr; 293 short offset = offsetof(struct sockaddr_storage, __data); 294 295 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path)); 296 297 /* This may look like an off by one error but it is a bit more 298 * subtle. 108 is the longest valid AF_UNIX path for a binding. 299 * sun_path[108] doesn't as such exist. However in kernel space 300 * we are guaranteed that it is a valid memory location in our 301 * kernel address buffer because syscall functions always pass 302 * a pointer of struct sockaddr_storage which has a bigger buffer 303 * than 108. Also, we must terminate sun_path for strlen() in 304 * getname_kernel(). 305 */ 306 addr->__data[addr_len - offset] = 0; 307 308 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will 309 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen() 310 * know the actual buffer. 311 */ 312 return strlen(addr->__data) + offset + 1; 313 } 314 315 static void __unix_remove_socket(struct sock *sk) 316 { 317 sk_del_node_init(sk); 318 } 319 320 static void __unix_insert_socket(struct net *net, struct sock *sk) 321 { 322 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 323 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 324 } 325 326 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 327 struct unix_address *addr, unsigned int hash) 328 { 329 __unix_remove_socket(sk); 330 smp_store_release(&unix_sk(sk)->addr, addr); 331 332 sk->sk_hash = hash; 333 __unix_insert_socket(net, sk); 334 } 335 336 static void unix_remove_socket(struct net *net, struct sock *sk) 337 { 338 spin_lock(&net->unx.table.locks[sk->sk_hash]); 339 __unix_remove_socket(sk); 340 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 341 } 342 343 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 344 { 345 spin_lock(&net->unx.table.locks[sk->sk_hash]); 346 __unix_insert_socket(net, sk); 347 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 348 } 349 350 static void unix_insert_bsd_socket(struct sock *sk) 351 { 352 spin_lock(&bsd_socket_locks[sk->sk_hash]); 353 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 354 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 355 } 356 357 static void unix_remove_bsd_socket(struct sock *sk) 358 { 359 if (!hlist_unhashed(&sk->sk_bind_node)) { 360 spin_lock(&bsd_socket_locks[sk->sk_hash]); 361 __sk_del_bind_node(sk); 362 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 363 364 sk_node_init(&sk->sk_bind_node); 365 } 366 } 367 368 static struct sock *__unix_find_socket_byname(struct net *net, 369 struct sockaddr_un *sunname, 370 int len, unsigned int hash) 371 { 372 struct sock *s; 373 374 sk_for_each(s, &net->unx.table.buckets[hash]) { 375 struct unix_sock *u = unix_sk(s); 376 377 if (u->addr->len == len && 378 !memcmp(u->addr->name, sunname, len)) 379 return s; 380 } 381 return NULL; 382 } 383 384 static inline struct sock *unix_find_socket_byname(struct net *net, 385 struct sockaddr_un *sunname, 386 int len, unsigned int hash) 387 { 388 struct sock *s; 389 390 spin_lock(&net->unx.table.locks[hash]); 391 s = __unix_find_socket_byname(net, sunname, len, hash); 392 if (s) 393 sock_hold(s); 394 spin_unlock(&net->unx.table.locks[hash]); 395 return s; 396 } 397 398 static struct sock *unix_find_socket_byinode(struct inode *i) 399 { 400 unsigned int hash = unix_bsd_hash(i); 401 struct sock *s; 402 403 spin_lock(&bsd_socket_locks[hash]); 404 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 405 struct dentry *dentry = unix_sk(s)->path.dentry; 406 407 if (dentry && d_backing_inode(dentry) == i) { 408 sock_hold(s); 409 spin_unlock(&bsd_socket_locks[hash]); 410 return s; 411 } 412 } 413 spin_unlock(&bsd_socket_locks[hash]); 414 return NULL; 415 } 416 417 /* Support code for asymmetrically connected dgram sockets 418 * 419 * If a datagram socket is connected to a socket not itself connected 420 * to the first socket (eg, /dev/log), clients may only enqueue more 421 * messages if the present receive queue of the server socket is not 422 * "too large". This means there's a second writeability condition 423 * poll and sendmsg need to test. The dgram recv code will do a wake 424 * up on the peer_wait wait queue of a socket upon reception of a 425 * datagram which needs to be propagated to sleeping would-be writers 426 * since these might not have sent anything so far. This can't be 427 * accomplished via poll_wait because the lifetime of the server 428 * socket might be less than that of its clients if these break their 429 * association with it or if the server socket is closed while clients 430 * are still connected to it and there's no way to inform "a polling 431 * implementation" that it should let go of a certain wait queue 432 * 433 * In order to propagate a wake up, a wait_queue_entry_t of the client 434 * socket is enqueued on the peer_wait queue of the server socket 435 * whose wake function does a wake_up on the ordinary client socket 436 * wait queue. This connection is established whenever a write (or 437 * poll for write) hit the flow control condition and broken when the 438 * association to the server socket is dissolved or after a wake up 439 * was relayed. 440 */ 441 442 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 443 void *key) 444 { 445 struct unix_sock *u; 446 wait_queue_head_t *u_sleep; 447 448 u = container_of(q, struct unix_sock, peer_wake); 449 450 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 451 q); 452 u->peer_wake.private = NULL; 453 454 /* relaying can only happen while the wq still exists */ 455 u_sleep = sk_sleep(&u->sk); 456 if (u_sleep) 457 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 458 459 return 0; 460 } 461 462 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 463 { 464 struct unix_sock *u, *u_other; 465 int rc; 466 467 u = unix_sk(sk); 468 u_other = unix_sk(other); 469 rc = 0; 470 spin_lock(&u_other->peer_wait.lock); 471 472 if (!u->peer_wake.private) { 473 u->peer_wake.private = other; 474 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 475 476 rc = 1; 477 } 478 479 spin_unlock(&u_other->peer_wait.lock); 480 return rc; 481 } 482 483 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 484 struct sock *other) 485 { 486 struct unix_sock *u, *u_other; 487 488 u = unix_sk(sk); 489 u_other = unix_sk(other); 490 spin_lock(&u_other->peer_wait.lock); 491 492 if (u->peer_wake.private == other) { 493 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 494 u->peer_wake.private = NULL; 495 } 496 497 spin_unlock(&u_other->peer_wait.lock); 498 } 499 500 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 501 struct sock *other) 502 { 503 unix_dgram_peer_wake_disconnect(sk, other); 504 wake_up_interruptible_poll(sk_sleep(sk), 505 EPOLLOUT | 506 EPOLLWRNORM | 507 EPOLLWRBAND); 508 } 509 510 /* preconditions: 511 * - unix_peer(sk) == other 512 * - association is stable 513 */ 514 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 515 { 516 int connected; 517 518 connected = unix_dgram_peer_wake_connect(sk, other); 519 520 /* If other is SOCK_DEAD, we want to make sure we signal 521 * POLLOUT, such that a subsequent write() can get a 522 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 523 * to other and its full, we will hang waiting for POLLOUT. 524 */ 525 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 526 return 1; 527 528 if (connected) 529 unix_dgram_peer_wake_disconnect(sk, other); 530 531 return 0; 532 } 533 534 static int unix_writable(const struct sock *sk) 535 { 536 return sk->sk_state != TCP_LISTEN && 537 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 538 } 539 540 static void unix_write_space(struct sock *sk) 541 { 542 struct socket_wq *wq; 543 544 rcu_read_lock(); 545 if (unix_writable(sk)) { 546 wq = rcu_dereference(sk->sk_wq); 547 if (skwq_has_sleeper(wq)) 548 wake_up_interruptible_sync_poll(&wq->wait, 549 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 550 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 551 } 552 rcu_read_unlock(); 553 } 554 555 /* When dgram socket disconnects (or changes its peer), we clear its receive 556 * queue of packets arrived from previous peer. First, it allows to do 557 * flow control based only on wmem_alloc; second, sk connected to peer 558 * may receive messages only from that peer. */ 559 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 560 { 561 if (!skb_queue_empty(&sk->sk_receive_queue)) { 562 skb_queue_purge(&sk->sk_receive_queue); 563 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 564 565 /* If one link of bidirectional dgram pipe is disconnected, 566 * we signal error. Messages are lost. Do not make this, 567 * when peer was not connected to us. 568 */ 569 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 570 WRITE_ONCE(other->sk_err, ECONNRESET); 571 sk_error_report(other); 572 } 573 } 574 other->sk_state = TCP_CLOSE; 575 } 576 577 static void unix_sock_destructor(struct sock *sk) 578 { 579 struct unix_sock *u = unix_sk(sk); 580 581 skb_queue_purge(&sk->sk_receive_queue); 582 583 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 584 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 585 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 586 if (!sock_flag(sk, SOCK_DEAD)) { 587 pr_info("Attempt to release alive unix socket: %p\n", sk); 588 return; 589 } 590 591 if (u->addr) 592 unix_release_addr(u->addr); 593 594 atomic_long_dec(&unix_nr_socks); 595 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 596 #ifdef UNIX_REFCNT_DEBUG 597 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 598 atomic_long_read(&unix_nr_socks)); 599 #endif 600 } 601 602 static void unix_release_sock(struct sock *sk, int embrion) 603 { 604 struct unix_sock *u = unix_sk(sk); 605 struct sock *skpair; 606 struct sk_buff *skb; 607 struct path path; 608 int state; 609 610 unix_remove_socket(sock_net(sk), sk); 611 unix_remove_bsd_socket(sk); 612 613 /* Clear state */ 614 unix_state_lock(sk); 615 sock_orphan(sk); 616 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); 617 path = u->path; 618 u->path.dentry = NULL; 619 u->path.mnt = NULL; 620 state = sk->sk_state; 621 sk->sk_state = TCP_CLOSE; 622 623 skpair = unix_peer(sk); 624 unix_peer(sk) = NULL; 625 626 unix_state_unlock(sk); 627 628 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 629 if (u->oob_skb) { 630 kfree_skb(u->oob_skb); 631 u->oob_skb = NULL; 632 } 633 #endif 634 635 wake_up_interruptible_all(&u->peer_wait); 636 637 if (skpair != NULL) { 638 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 639 unix_state_lock(skpair); 640 /* No more writes */ 641 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); 642 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 643 WRITE_ONCE(skpair->sk_err, ECONNRESET); 644 unix_state_unlock(skpair); 645 skpair->sk_state_change(skpair); 646 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 647 } 648 649 unix_dgram_peer_wake_disconnect(sk, skpair); 650 sock_put(skpair); /* It may now die */ 651 } 652 653 /* Try to flush out this socket. Throw out buffers at least */ 654 655 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 656 if (state == TCP_LISTEN) 657 unix_release_sock(skb->sk, 1); 658 /* passed fds are erased in the kfree_skb hook */ 659 UNIXCB(skb).consumed = skb->len; 660 kfree_skb(skb); 661 } 662 663 if (path.dentry) 664 path_put(&path); 665 666 sock_put(sk); 667 668 /* ---- Socket is dead now and most probably destroyed ---- */ 669 670 /* 671 * Fixme: BSD difference: In BSD all sockets connected to us get 672 * ECONNRESET and we die on the spot. In Linux we behave 673 * like files and pipes do and wait for the last 674 * dereference. 675 * 676 * Can't we simply set sock->err? 677 * 678 * What the above comment does talk about? --ANK(980817) 679 */ 680 681 if (READ_ONCE(unix_tot_inflight)) 682 unix_gc(); /* Garbage collect fds */ 683 } 684 685 static void init_peercred(struct sock *sk) 686 { 687 const struct cred *old_cred; 688 struct pid *old_pid; 689 690 spin_lock(&sk->sk_peer_lock); 691 old_pid = sk->sk_peer_pid; 692 old_cred = sk->sk_peer_cred; 693 sk->sk_peer_pid = get_pid(task_tgid(current)); 694 sk->sk_peer_cred = get_current_cred(); 695 spin_unlock(&sk->sk_peer_lock); 696 697 put_pid(old_pid); 698 put_cred(old_cred); 699 } 700 701 static void copy_peercred(struct sock *sk, struct sock *peersk) 702 { 703 const struct cred *old_cred; 704 struct pid *old_pid; 705 706 if (sk < peersk) { 707 spin_lock(&sk->sk_peer_lock); 708 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 709 } else { 710 spin_lock(&peersk->sk_peer_lock); 711 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 712 } 713 old_pid = sk->sk_peer_pid; 714 old_cred = sk->sk_peer_cred; 715 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 716 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 717 718 spin_unlock(&sk->sk_peer_lock); 719 spin_unlock(&peersk->sk_peer_lock); 720 721 put_pid(old_pid); 722 put_cred(old_cred); 723 } 724 725 static int unix_listen(struct socket *sock, int backlog) 726 { 727 int err; 728 struct sock *sk = sock->sk; 729 struct unix_sock *u = unix_sk(sk); 730 731 err = -EOPNOTSUPP; 732 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 733 goto out; /* Only stream/seqpacket sockets accept */ 734 err = -EINVAL; 735 if (!u->addr) 736 goto out; /* No listens on an unbound socket */ 737 unix_state_lock(sk); 738 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 739 goto out_unlock; 740 if (backlog > sk->sk_max_ack_backlog) 741 wake_up_interruptible_all(&u->peer_wait); 742 sk->sk_max_ack_backlog = backlog; 743 sk->sk_state = TCP_LISTEN; 744 /* set credentials so connect can copy them */ 745 init_peercred(sk); 746 err = 0; 747 748 out_unlock: 749 unix_state_unlock(sk); 750 out: 751 return err; 752 } 753 754 static int unix_release(struct socket *); 755 static int unix_bind(struct socket *, struct sockaddr *, int); 756 static int unix_stream_connect(struct socket *, struct sockaddr *, 757 int addr_len, int flags); 758 static int unix_socketpair(struct socket *, struct socket *); 759 static int unix_accept(struct socket *, struct socket *, int, bool); 760 static int unix_getname(struct socket *, struct sockaddr *, int); 761 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 762 static __poll_t unix_dgram_poll(struct file *, struct socket *, 763 poll_table *); 764 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 765 #ifdef CONFIG_COMPAT 766 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 767 #endif 768 static int unix_shutdown(struct socket *, int); 769 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 770 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 771 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 772 struct pipe_inode_info *, size_t size, 773 unsigned int flags); 774 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 775 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 776 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 777 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 778 static int unix_dgram_connect(struct socket *, struct sockaddr *, 779 int, int); 780 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 781 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 782 int); 783 784 static int unix_set_peek_off(struct sock *sk, int val) 785 { 786 struct unix_sock *u = unix_sk(sk); 787 788 if (mutex_lock_interruptible(&u->iolock)) 789 return -EINTR; 790 791 WRITE_ONCE(sk->sk_peek_off, val); 792 mutex_unlock(&u->iolock); 793 794 return 0; 795 } 796 797 #ifdef CONFIG_PROC_FS 798 static int unix_count_nr_fds(struct sock *sk) 799 { 800 struct sk_buff *skb; 801 struct unix_sock *u; 802 int nr_fds = 0; 803 804 spin_lock(&sk->sk_receive_queue.lock); 805 skb = skb_peek(&sk->sk_receive_queue); 806 while (skb) { 807 u = unix_sk(skb->sk); 808 nr_fds += atomic_read(&u->scm_stat.nr_fds); 809 skb = skb_peek_next(skb, &sk->sk_receive_queue); 810 } 811 spin_unlock(&sk->sk_receive_queue.lock); 812 813 return nr_fds; 814 } 815 816 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 817 { 818 struct sock *sk = sock->sk; 819 unsigned char s_state; 820 struct unix_sock *u; 821 int nr_fds = 0; 822 823 if (sk) { 824 s_state = READ_ONCE(sk->sk_state); 825 u = unix_sk(sk); 826 827 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their 828 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN. 829 * SOCK_DGRAM is ordinary. So, no lock is needed. 830 */ 831 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED) 832 nr_fds = atomic_read(&u->scm_stat.nr_fds); 833 else if (s_state == TCP_LISTEN) 834 nr_fds = unix_count_nr_fds(sk); 835 836 seq_printf(m, "scm_fds: %u\n", nr_fds); 837 } 838 } 839 #else 840 #define unix_show_fdinfo NULL 841 #endif 842 843 static const struct proto_ops unix_stream_ops = { 844 .family = PF_UNIX, 845 .owner = THIS_MODULE, 846 .release = unix_release, 847 .bind = unix_bind, 848 .connect = unix_stream_connect, 849 .socketpair = unix_socketpair, 850 .accept = unix_accept, 851 .getname = unix_getname, 852 .poll = unix_poll, 853 .ioctl = unix_ioctl, 854 #ifdef CONFIG_COMPAT 855 .compat_ioctl = unix_compat_ioctl, 856 #endif 857 .listen = unix_listen, 858 .shutdown = unix_shutdown, 859 .sendmsg = unix_stream_sendmsg, 860 .recvmsg = unix_stream_recvmsg, 861 .read_skb = unix_stream_read_skb, 862 .mmap = sock_no_mmap, 863 .splice_read = unix_stream_splice_read, 864 .set_peek_off = unix_set_peek_off, 865 .show_fdinfo = unix_show_fdinfo, 866 }; 867 868 static const struct proto_ops unix_dgram_ops = { 869 .family = PF_UNIX, 870 .owner = THIS_MODULE, 871 .release = unix_release, 872 .bind = unix_bind, 873 .connect = unix_dgram_connect, 874 .socketpair = unix_socketpair, 875 .accept = sock_no_accept, 876 .getname = unix_getname, 877 .poll = unix_dgram_poll, 878 .ioctl = unix_ioctl, 879 #ifdef CONFIG_COMPAT 880 .compat_ioctl = unix_compat_ioctl, 881 #endif 882 .listen = sock_no_listen, 883 .shutdown = unix_shutdown, 884 .sendmsg = unix_dgram_sendmsg, 885 .read_skb = unix_read_skb, 886 .recvmsg = unix_dgram_recvmsg, 887 .mmap = sock_no_mmap, 888 .set_peek_off = unix_set_peek_off, 889 .show_fdinfo = unix_show_fdinfo, 890 }; 891 892 static const struct proto_ops unix_seqpacket_ops = { 893 .family = PF_UNIX, 894 .owner = THIS_MODULE, 895 .release = unix_release, 896 .bind = unix_bind, 897 .connect = unix_stream_connect, 898 .socketpair = unix_socketpair, 899 .accept = unix_accept, 900 .getname = unix_getname, 901 .poll = unix_dgram_poll, 902 .ioctl = unix_ioctl, 903 #ifdef CONFIG_COMPAT 904 .compat_ioctl = unix_compat_ioctl, 905 #endif 906 .listen = unix_listen, 907 .shutdown = unix_shutdown, 908 .sendmsg = unix_seqpacket_sendmsg, 909 .recvmsg = unix_seqpacket_recvmsg, 910 .mmap = sock_no_mmap, 911 .set_peek_off = unix_set_peek_off, 912 .show_fdinfo = unix_show_fdinfo, 913 }; 914 915 static void unix_close(struct sock *sk, long timeout) 916 { 917 /* Nothing to do here, unix socket does not need a ->close(). 918 * This is merely for sockmap. 919 */ 920 } 921 922 static void unix_unhash(struct sock *sk) 923 { 924 /* Nothing to do here, unix socket does not need a ->unhash(). 925 * This is merely for sockmap. 926 */ 927 } 928 929 static bool unix_bpf_bypass_getsockopt(int level, int optname) 930 { 931 if (level == SOL_SOCKET) { 932 switch (optname) { 933 case SO_PEERPIDFD: 934 return true; 935 default: 936 return false; 937 } 938 } 939 940 return false; 941 } 942 943 struct proto unix_dgram_proto = { 944 .name = "UNIX", 945 .owner = THIS_MODULE, 946 .obj_size = sizeof(struct unix_sock), 947 .close = unix_close, 948 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 949 #ifdef CONFIG_BPF_SYSCALL 950 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 951 #endif 952 }; 953 954 struct proto unix_stream_proto = { 955 .name = "UNIX-STREAM", 956 .owner = THIS_MODULE, 957 .obj_size = sizeof(struct unix_sock), 958 .close = unix_close, 959 .unhash = unix_unhash, 960 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 961 #ifdef CONFIG_BPF_SYSCALL 962 .psock_update_sk_prot = unix_stream_bpf_update_proto, 963 #endif 964 }; 965 966 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 967 { 968 struct unix_sock *u; 969 struct sock *sk; 970 int err; 971 972 atomic_long_inc(&unix_nr_socks); 973 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 974 err = -ENFILE; 975 goto err; 976 } 977 978 if (type == SOCK_STREAM) 979 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 980 else /*dgram and seqpacket */ 981 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 982 983 if (!sk) { 984 err = -ENOMEM; 985 goto err; 986 } 987 988 sock_init_data(sock, sk); 989 990 sk->sk_hash = unix_unbound_hash(sk); 991 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 992 sk->sk_write_space = unix_write_space; 993 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 994 sk->sk_destruct = unix_sock_destructor; 995 u = unix_sk(sk); 996 u->path.dentry = NULL; 997 u->path.mnt = NULL; 998 spin_lock_init(&u->lock); 999 atomic_long_set(&u->inflight, 0); 1000 INIT_LIST_HEAD(&u->link); 1001 mutex_init(&u->iolock); /* single task reading lock */ 1002 mutex_init(&u->bindlock); /* single task binding lock */ 1003 init_waitqueue_head(&u->peer_wait); 1004 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 1005 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 1006 unix_insert_unbound_socket(net, sk); 1007 1008 sock_prot_inuse_add(net, sk->sk_prot, 1); 1009 1010 return sk; 1011 1012 err: 1013 atomic_long_dec(&unix_nr_socks); 1014 return ERR_PTR(err); 1015 } 1016 1017 static int unix_create(struct net *net, struct socket *sock, int protocol, 1018 int kern) 1019 { 1020 struct sock *sk; 1021 1022 if (protocol && protocol != PF_UNIX) 1023 return -EPROTONOSUPPORT; 1024 1025 sock->state = SS_UNCONNECTED; 1026 1027 switch (sock->type) { 1028 case SOCK_STREAM: 1029 sock->ops = &unix_stream_ops; 1030 break; 1031 /* 1032 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 1033 * nothing uses it. 1034 */ 1035 case SOCK_RAW: 1036 sock->type = SOCK_DGRAM; 1037 fallthrough; 1038 case SOCK_DGRAM: 1039 sock->ops = &unix_dgram_ops; 1040 break; 1041 case SOCK_SEQPACKET: 1042 sock->ops = &unix_seqpacket_ops; 1043 break; 1044 default: 1045 return -ESOCKTNOSUPPORT; 1046 } 1047 1048 sk = unix_create1(net, sock, kern, sock->type); 1049 if (IS_ERR(sk)) 1050 return PTR_ERR(sk); 1051 1052 return 0; 1053 } 1054 1055 static int unix_release(struct socket *sock) 1056 { 1057 struct sock *sk = sock->sk; 1058 1059 if (!sk) 1060 return 0; 1061 1062 sk->sk_prot->close(sk, 0); 1063 unix_release_sock(sk, 0); 1064 sock->sk = NULL; 1065 1066 return 0; 1067 } 1068 1069 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1070 int type) 1071 { 1072 struct inode *inode; 1073 struct path path; 1074 struct sock *sk; 1075 int err; 1076 1077 unix_mkname_bsd(sunaddr, addr_len); 1078 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1079 if (err) 1080 goto fail; 1081 1082 err = path_permission(&path, MAY_WRITE); 1083 if (err) 1084 goto path_put; 1085 1086 err = -ECONNREFUSED; 1087 inode = d_backing_inode(path.dentry); 1088 if (!S_ISSOCK(inode->i_mode)) 1089 goto path_put; 1090 1091 sk = unix_find_socket_byinode(inode); 1092 if (!sk) 1093 goto path_put; 1094 1095 err = -EPROTOTYPE; 1096 if (sk->sk_type == type) 1097 touch_atime(&path); 1098 else 1099 goto sock_put; 1100 1101 path_put(&path); 1102 1103 return sk; 1104 1105 sock_put: 1106 sock_put(sk); 1107 path_put: 1108 path_put(&path); 1109 fail: 1110 return ERR_PTR(err); 1111 } 1112 1113 static struct sock *unix_find_abstract(struct net *net, 1114 struct sockaddr_un *sunaddr, 1115 int addr_len, int type) 1116 { 1117 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1118 struct dentry *dentry; 1119 struct sock *sk; 1120 1121 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1122 if (!sk) 1123 return ERR_PTR(-ECONNREFUSED); 1124 1125 dentry = unix_sk(sk)->path.dentry; 1126 if (dentry) 1127 touch_atime(&unix_sk(sk)->path); 1128 1129 return sk; 1130 } 1131 1132 static struct sock *unix_find_other(struct net *net, 1133 struct sockaddr_un *sunaddr, 1134 int addr_len, int type) 1135 { 1136 struct sock *sk; 1137 1138 if (sunaddr->sun_path[0]) 1139 sk = unix_find_bsd(sunaddr, addr_len, type); 1140 else 1141 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1142 1143 return sk; 1144 } 1145 1146 static int unix_autobind(struct sock *sk) 1147 { 1148 unsigned int new_hash, old_hash = sk->sk_hash; 1149 struct unix_sock *u = unix_sk(sk); 1150 struct net *net = sock_net(sk); 1151 struct unix_address *addr; 1152 u32 lastnum, ordernum; 1153 int err; 1154 1155 err = mutex_lock_interruptible(&u->bindlock); 1156 if (err) 1157 return err; 1158 1159 if (u->addr) 1160 goto out; 1161 1162 err = -ENOMEM; 1163 addr = kzalloc(sizeof(*addr) + 1164 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1165 if (!addr) 1166 goto out; 1167 1168 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1169 addr->name->sun_family = AF_UNIX; 1170 refcount_set(&addr->refcnt, 1); 1171 1172 ordernum = get_random_u32(); 1173 lastnum = ordernum & 0xFFFFF; 1174 retry: 1175 ordernum = (ordernum + 1) & 0xFFFFF; 1176 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1177 1178 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1179 unix_table_double_lock(net, old_hash, new_hash); 1180 1181 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1182 unix_table_double_unlock(net, old_hash, new_hash); 1183 1184 /* __unix_find_socket_byname() may take long time if many names 1185 * are already in use. 1186 */ 1187 cond_resched(); 1188 1189 if (ordernum == lastnum) { 1190 /* Give up if all names seems to be in use. */ 1191 err = -ENOSPC; 1192 unix_release_addr(addr); 1193 goto out; 1194 } 1195 1196 goto retry; 1197 } 1198 1199 __unix_set_addr_hash(net, sk, addr, new_hash); 1200 unix_table_double_unlock(net, old_hash, new_hash); 1201 err = 0; 1202 1203 out: mutex_unlock(&u->bindlock); 1204 return err; 1205 } 1206 1207 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1208 int addr_len) 1209 { 1210 umode_t mode = S_IFSOCK | 1211 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1212 unsigned int new_hash, old_hash = sk->sk_hash; 1213 struct unix_sock *u = unix_sk(sk); 1214 struct net *net = sock_net(sk); 1215 struct mnt_idmap *idmap; 1216 struct unix_address *addr; 1217 struct dentry *dentry; 1218 struct path parent; 1219 int err; 1220 1221 addr_len = unix_mkname_bsd(sunaddr, addr_len); 1222 addr = unix_create_addr(sunaddr, addr_len); 1223 if (!addr) 1224 return -ENOMEM; 1225 1226 /* 1227 * Get the parent directory, calculate the hash for last 1228 * component. 1229 */ 1230 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1231 if (IS_ERR(dentry)) { 1232 err = PTR_ERR(dentry); 1233 goto out; 1234 } 1235 1236 /* 1237 * All right, let's create it. 1238 */ 1239 idmap = mnt_idmap(parent.mnt); 1240 err = security_path_mknod(&parent, dentry, mode, 0); 1241 if (!err) 1242 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); 1243 if (err) 1244 goto out_path; 1245 err = mutex_lock_interruptible(&u->bindlock); 1246 if (err) 1247 goto out_unlink; 1248 if (u->addr) 1249 goto out_unlock; 1250 1251 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1252 unix_table_double_lock(net, old_hash, new_hash); 1253 u->path.mnt = mntget(parent.mnt); 1254 u->path.dentry = dget(dentry); 1255 __unix_set_addr_hash(net, sk, addr, new_hash); 1256 unix_table_double_unlock(net, old_hash, new_hash); 1257 unix_insert_bsd_socket(sk); 1258 mutex_unlock(&u->bindlock); 1259 done_path_create(&parent, dentry); 1260 return 0; 1261 1262 out_unlock: 1263 mutex_unlock(&u->bindlock); 1264 err = -EINVAL; 1265 out_unlink: 1266 /* failed after successful mknod? unlink what we'd created... */ 1267 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL); 1268 out_path: 1269 done_path_create(&parent, dentry); 1270 out: 1271 unix_release_addr(addr); 1272 return err == -EEXIST ? -EADDRINUSE : err; 1273 } 1274 1275 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1276 int addr_len) 1277 { 1278 unsigned int new_hash, old_hash = sk->sk_hash; 1279 struct unix_sock *u = unix_sk(sk); 1280 struct net *net = sock_net(sk); 1281 struct unix_address *addr; 1282 int err; 1283 1284 addr = unix_create_addr(sunaddr, addr_len); 1285 if (!addr) 1286 return -ENOMEM; 1287 1288 err = mutex_lock_interruptible(&u->bindlock); 1289 if (err) 1290 goto out; 1291 1292 if (u->addr) { 1293 err = -EINVAL; 1294 goto out_mutex; 1295 } 1296 1297 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1298 unix_table_double_lock(net, old_hash, new_hash); 1299 1300 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1301 goto out_spin; 1302 1303 __unix_set_addr_hash(net, sk, addr, new_hash); 1304 unix_table_double_unlock(net, old_hash, new_hash); 1305 mutex_unlock(&u->bindlock); 1306 return 0; 1307 1308 out_spin: 1309 unix_table_double_unlock(net, old_hash, new_hash); 1310 err = -EADDRINUSE; 1311 out_mutex: 1312 mutex_unlock(&u->bindlock); 1313 out: 1314 unix_release_addr(addr); 1315 return err; 1316 } 1317 1318 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1319 { 1320 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1321 struct sock *sk = sock->sk; 1322 int err; 1323 1324 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1325 sunaddr->sun_family == AF_UNIX) 1326 return unix_autobind(sk); 1327 1328 err = unix_validate_addr(sunaddr, addr_len); 1329 if (err) 1330 return err; 1331 1332 if (sunaddr->sun_path[0]) 1333 err = unix_bind_bsd(sk, sunaddr, addr_len); 1334 else 1335 err = unix_bind_abstract(sk, sunaddr, addr_len); 1336 1337 return err; 1338 } 1339 1340 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1341 { 1342 if (unlikely(sk1 == sk2) || !sk2) { 1343 unix_state_lock(sk1); 1344 return; 1345 } 1346 if (sk1 < sk2) { 1347 unix_state_lock(sk1); 1348 unix_state_lock_nested(sk2); 1349 } else { 1350 unix_state_lock(sk2); 1351 unix_state_lock_nested(sk1); 1352 } 1353 } 1354 1355 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1356 { 1357 if (unlikely(sk1 == sk2) || !sk2) { 1358 unix_state_unlock(sk1); 1359 return; 1360 } 1361 unix_state_unlock(sk1); 1362 unix_state_unlock(sk2); 1363 } 1364 1365 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1366 int alen, int flags) 1367 { 1368 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1369 struct sock *sk = sock->sk; 1370 struct sock *other; 1371 int err; 1372 1373 err = -EINVAL; 1374 if (alen < offsetofend(struct sockaddr, sa_family)) 1375 goto out; 1376 1377 if (addr->sa_family != AF_UNSPEC) { 1378 err = unix_validate_addr(sunaddr, alen); 1379 if (err) 1380 goto out; 1381 1382 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1383 test_bit(SOCK_PASSPIDFD, &sock->flags)) && 1384 !unix_sk(sk)->addr) { 1385 err = unix_autobind(sk); 1386 if (err) 1387 goto out; 1388 } 1389 1390 restart: 1391 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1392 if (IS_ERR(other)) { 1393 err = PTR_ERR(other); 1394 goto out; 1395 } 1396 1397 unix_state_double_lock(sk, other); 1398 1399 /* Apparently VFS overslept socket death. Retry. */ 1400 if (sock_flag(other, SOCK_DEAD)) { 1401 unix_state_double_unlock(sk, other); 1402 sock_put(other); 1403 goto restart; 1404 } 1405 1406 err = -EPERM; 1407 if (!unix_may_send(sk, other)) 1408 goto out_unlock; 1409 1410 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1411 if (err) 1412 goto out_unlock; 1413 1414 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1415 } else { 1416 /* 1417 * 1003.1g breaking connected state with AF_UNSPEC 1418 */ 1419 other = NULL; 1420 unix_state_double_lock(sk, other); 1421 } 1422 1423 /* 1424 * If it was connected, reconnect. 1425 */ 1426 if (unix_peer(sk)) { 1427 struct sock *old_peer = unix_peer(sk); 1428 1429 unix_peer(sk) = other; 1430 if (!other) 1431 sk->sk_state = TCP_CLOSE; 1432 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1433 1434 unix_state_double_unlock(sk, other); 1435 1436 if (other != old_peer) 1437 unix_dgram_disconnected(sk, old_peer); 1438 sock_put(old_peer); 1439 } else { 1440 unix_peer(sk) = other; 1441 unix_state_double_unlock(sk, other); 1442 } 1443 1444 return 0; 1445 1446 out_unlock: 1447 unix_state_double_unlock(sk, other); 1448 sock_put(other); 1449 out: 1450 return err; 1451 } 1452 1453 static long unix_wait_for_peer(struct sock *other, long timeo) 1454 __releases(&unix_sk(other)->lock) 1455 { 1456 struct unix_sock *u = unix_sk(other); 1457 int sched; 1458 DEFINE_WAIT(wait); 1459 1460 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1461 1462 sched = !sock_flag(other, SOCK_DEAD) && 1463 !(other->sk_shutdown & RCV_SHUTDOWN) && 1464 unix_recvq_full_lockless(other); 1465 1466 unix_state_unlock(other); 1467 1468 if (sched) 1469 timeo = schedule_timeout(timeo); 1470 1471 finish_wait(&u->peer_wait, &wait); 1472 return timeo; 1473 } 1474 1475 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1476 int addr_len, int flags) 1477 { 1478 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1479 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1480 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1481 struct net *net = sock_net(sk); 1482 struct sk_buff *skb = NULL; 1483 long timeo; 1484 int err; 1485 int st; 1486 1487 err = unix_validate_addr(sunaddr, addr_len); 1488 if (err) 1489 goto out; 1490 1491 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1492 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { 1493 err = unix_autobind(sk); 1494 if (err) 1495 goto out; 1496 } 1497 1498 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1499 1500 /* First of all allocate resources. 1501 If we will make it after state is locked, 1502 we will have to recheck all again in any case. 1503 */ 1504 1505 /* create new sock for complete connection */ 1506 newsk = unix_create1(net, NULL, 0, sock->type); 1507 if (IS_ERR(newsk)) { 1508 err = PTR_ERR(newsk); 1509 newsk = NULL; 1510 goto out; 1511 } 1512 1513 err = -ENOMEM; 1514 1515 /* Allocate skb for sending to listening sock */ 1516 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1517 if (skb == NULL) 1518 goto out; 1519 1520 restart: 1521 /* Find listening sock. */ 1522 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1523 if (IS_ERR(other)) { 1524 err = PTR_ERR(other); 1525 other = NULL; 1526 goto out; 1527 } 1528 1529 /* Latch state of peer */ 1530 unix_state_lock(other); 1531 1532 /* Apparently VFS overslept socket death. Retry. */ 1533 if (sock_flag(other, SOCK_DEAD)) { 1534 unix_state_unlock(other); 1535 sock_put(other); 1536 goto restart; 1537 } 1538 1539 err = -ECONNREFUSED; 1540 if (other->sk_state != TCP_LISTEN) 1541 goto out_unlock; 1542 if (other->sk_shutdown & RCV_SHUTDOWN) 1543 goto out_unlock; 1544 1545 if (unix_recvq_full(other)) { 1546 err = -EAGAIN; 1547 if (!timeo) 1548 goto out_unlock; 1549 1550 timeo = unix_wait_for_peer(other, timeo); 1551 1552 err = sock_intr_errno(timeo); 1553 if (signal_pending(current)) 1554 goto out; 1555 sock_put(other); 1556 goto restart; 1557 } 1558 1559 /* Latch our state. 1560 1561 It is tricky place. We need to grab our state lock and cannot 1562 drop lock on peer. It is dangerous because deadlock is 1563 possible. Connect to self case and simultaneous 1564 attempt to connect are eliminated by checking socket 1565 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1566 check this before attempt to grab lock. 1567 1568 Well, and we have to recheck the state after socket locked. 1569 */ 1570 st = sk->sk_state; 1571 1572 switch (st) { 1573 case TCP_CLOSE: 1574 /* This is ok... continue with connect */ 1575 break; 1576 case TCP_ESTABLISHED: 1577 /* Socket is already connected */ 1578 err = -EISCONN; 1579 goto out_unlock; 1580 default: 1581 err = -EINVAL; 1582 goto out_unlock; 1583 } 1584 1585 unix_state_lock_nested(sk); 1586 1587 if (sk->sk_state != st) { 1588 unix_state_unlock(sk); 1589 unix_state_unlock(other); 1590 sock_put(other); 1591 goto restart; 1592 } 1593 1594 err = security_unix_stream_connect(sk, other, newsk); 1595 if (err) { 1596 unix_state_unlock(sk); 1597 goto out_unlock; 1598 } 1599 1600 /* The way is open! Fastly set all the necessary fields... */ 1601 1602 sock_hold(sk); 1603 unix_peer(newsk) = sk; 1604 newsk->sk_state = TCP_ESTABLISHED; 1605 newsk->sk_type = sk->sk_type; 1606 init_peercred(newsk); 1607 newu = unix_sk(newsk); 1608 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1609 otheru = unix_sk(other); 1610 1611 /* copy address information from listening to new sock 1612 * 1613 * The contents of *(otheru->addr) and otheru->path 1614 * are seen fully set up here, since we have found 1615 * otheru in hash under its lock. Insertion into the 1616 * hash chain we'd found it in had been done in an 1617 * earlier critical area protected by the chain's lock, 1618 * the same one where we'd set *(otheru->addr) contents, 1619 * as well as otheru->path and otheru->addr itself. 1620 * 1621 * Using smp_store_release() here to set newu->addr 1622 * is enough to make those stores, as well as stores 1623 * to newu->path visible to anyone who gets newu->addr 1624 * by smp_load_acquire(). IOW, the same warranties 1625 * as for unix_sock instances bound in unix_bind() or 1626 * in unix_autobind(). 1627 */ 1628 if (otheru->path.dentry) { 1629 path_get(&otheru->path); 1630 newu->path = otheru->path; 1631 } 1632 refcount_inc(&otheru->addr->refcnt); 1633 smp_store_release(&newu->addr, otheru->addr); 1634 1635 /* Set credentials */ 1636 copy_peercred(sk, other); 1637 1638 sock->state = SS_CONNECTED; 1639 sk->sk_state = TCP_ESTABLISHED; 1640 sock_hold(newsk); 1641 1642 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1643 unix_peer(sk) = newsk; 1644 1645 unix_state_unlock(sk); 1646 1647 /* take ten and send info to listening sock */ 1648 spin_lock(&other->sk_receive_queue.lock); 1649 __skb_queue_tail(&other->sk_receive_queue, skb); 1650 spin_unlock(&other->sk_receive_queue.lock); 1651 unix_state_unlock(other); 1652 other->sk_data_ready(other); 1653 sock_put(other); 1654 return 0; 1655 1656 out_unlock: 1657 if (other) 1658 unix_state_unlock(other); 1659 1660 out: 1661 kfree_skb(skb); 1662 if (newsk) 1663 unix_release_sock(newsk, 0); 1664 if (other) 1665 sock_put(other); 1666 return err; 1667 } 1668 1669 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1670 { 1671 struct sock *ska = socka->sk, *skb = sockb->sk; 1672 1673 /* Join our sockets back to back */ 1674 sock_hold(ska); 1675 sock_hold(skb); 1676 unix_peer(ska) = skb; 1677 unix_peer(skb) = ska; 1678 init_peercred(ska); 1679 init_peercred(skb); 1680 1681 ska->sk_state = TCP_ESTABLISHED; 1682 skb->sk_state = TCP_ESTABLISHED; 1683 socka->state = SS_CONNECTED; 1684 sockb->state = SS_CONNECTED; 1685 return 0; 1686 } 1687 1688 static void unix_sock_inherit_flags(const struct socket *old, 1689 struct socket *new) 1690 { 1691 if (test_bit(SOCK_PASSCRED, &old->flags)) 1692 set_bit(SOCK_PASSCRED, &new->flags); 1693 if (test_bit(SOCK_PASSPIDFD, &old->flags)) 1694 set_bit(SOCK_PASSPIDFD, &new->flags); 1695 if (test_bit(SOCK_PASSSEC, &old->flags)) 1696 set_bit(SOCK_PASSSEC, &new->flags); 1697 } 1698 1699 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1700 bool kern) 1701 { 1702 struct sock *sk = sock->sk; 1703 struct sock *tsk; 1704 struct sk_buff *skb; 1705 int err; 1706 1707 err = -EOPNOTSUPP; 1708 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1709 goto out; 1710 1711 err = -EINVAL; 1712 if (sk->sk_state != TCP_LISTEN) 1713 goto out; 1714 1715 /* If socket state is TCP_LISTEN it cannot change (for now...), 1716 * so that no locks are necessary. 1717 */ 1718 1719 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1720 &err); 1721 if (!skb) { 1722 /* This means receive shutdown. */ 1723 if (err == 0) 1724 err = -EINVAL; 1725 goto out; 1726 } 1727 1728 tsk = skb->sk; 1729 skb_free_datagram(sk, skb); 1730 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1731 1732 /* attach accepted sock to socket */ 1733 unix_state_lock(tsk); 1734 newsock->state = SS_CONNECTED; 1735 unix_sock_inherit_flags(sock, newsock); 1736 sock_graft(tsk, newsock); 1737 unix_state_unlock(tsk); 1738 return 0; 1739 1740 out: 1741 return err; 1742 } 1743 1744 1745 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1746 { 1747 struct sock *sk = sock->sk; 1748 struct unix_address *addr; 1749 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1750 int err = 0; 1751 1752 if (peer) { 1753 sk = unix_peer_get(sk); 1754 1755 err = -ENOTCONN; 1756 if (!sk) 1757 goto out; 1758 err = 0; 1759 } else { 1760 sock_hold(sk); 1761 } 1762 1763 addr = smp_load_acquire(&unix_sk(sk)->addr); 1764 if (!addr) { 1765 sunaddr->sun_family = AF_UNIX; 1766 sunaddr->sun_path[0] = 0; 1767 err = offsetof(struct sockaddr_un, sun_path); 1768 } else { 1769 err = addr->len; 1770 memcpy(sunaddr, addr->name, addr->len); 1771 } 1772 sock_put(sk); 1773 out: 1774 return err; 1775 } 1776 1777 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1778 { 1779 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1780 1781 /* 1782 * Garbage collection of unix sockets starts by selecting a set of 1783 * candidate sockets which have reference only from being in flight 1784 * (total_refs == inflight_refs). This condition is checked once during 1785 * the candidate collection phase, and candidates are marked as such, so 1786 * that non-candidates can later be ignored. While inflight_refs is 1787 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1788 * is an instantaneous decision. 1789 * 1790 * Once a candidate, however, the socket must not be reinstalled into a 1791 * file descriptor while the garbage collection is in progress. 1792 * 1793 * If the above conditions are met, then the directed graph of 1794 * candidates (*) does not change while unix_gc_lock is held. 1795 * 1796 * Any operations that changes the file count through file descriptors 1797 * (dup, close, sendmsg) does not change the graph since candidates are 1798 * not installed in fds. 1799 * 1800 * Dequeing a candidate via recvmsg would install it into an fd, but 1801 * that takes unix_gc_lock to decrement the inflight count, so it's 1802 * serialized with garbage collection. 1803 * 1804 * MSG_PEEK is special in that it does not change the inflight count, 1805 * yet does install the socket into an fd. The following lock/unlock 1806 * pair is to ensure serialization with garbage collection. It must be 1807 * done between incrementing the file count and installing the file into 1808 * an fd. 1809 * 1810 * If garbage collection starts after the barrier provided by the 1811 * lock/unlock, then it will see the elevated refcount and not mark this 1812 * as a candidate. If a garbage collection is already in progress 1813 * before the file count was incremented, then the lock/unlock pair will 1814 * ensure that garbage collection is finished before progressing to 1815 * installing the fd. 1816 * 1817 * (*) A -> B where B is on the queue of A or B is on the queue of C 1818 * which is on the queue of listening socket A. 1819 */ 1820 spin_lock(&unix_gc_lock); 1821 spin_unlock(&unix_gc_lock); 1822 } 1823 1824 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1825 { 1826 int err = 0; 1827 1828 UNIXCB(skb).pid = get_pid(scm->pid); 1829 UNIXCB(skb).uid = scm->creds.uid; 1830 UNIXCB(skb).gid = scm->creds.gid; 1831 UNIXCB(skb).fp = NULL; 1832 unix_get_secdata(scm, skb); 1833 if (scm->fp && send_fds) 1834 err = unix_attach_fds(scm, skb); 1835 1836 skb->destructor = unix_destruct_scm; 1837 return err; 1838 } 1839 1840 static bool unix_passcred_enabled(const struct socket *sock, 1841 const struct sock *other) 1842 { 1843 return test_bit(SOCK_PASSCRED, &sock->flags) || 1844 test_bit(SOCK_PASSPIDFD, &sock->flags) || 1845 !other->sk_socket || 1846 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || 1847 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); 1848 } 1849 1850 /* 1851 * Some apps rely on write() giving SCM_CREDENTIALS 1852 * We include credentials if source or destination socket 1853 * asserted SOCK_PASSCRED. 1854 */ 1855 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1856 const struct sock *other) 1857 { 1858 if (UNIXCB(skb).pid) 1859 return; 1860 if (unix_passcred_enabled(sock, other)) { 1861 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1862 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1863 } 1864 } 1865 1866 static bool unix_skb_scm_eq(struct sk_buff *skb, 1867 struct scm_cookie *scm) 1868 { 1869 return UNIXCB(skb).pid == scm->pid && 1870 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1871 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1872 unix_secdata_eq(scm, skb); 1873 } 1874 1875 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1876 { 1877 struct scm_fp_list *fp = UNIXCB(skb).fp; 1878 struct unix_sock *u = unix_sk(sk); 1879 1880 if (unlikely(fp && fp->count)) 1881 atomic_add(fp->count, &u->scm_stat.nr_fds); 1882 } 1883 1884 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1885 { 1886 struct scm_fp_list *fp = UNIXCB(skb).fp; 1887 struct unix_sock *u = unix_sk(sk); 1888 1889 if (unlikely(fp && fp->count)) 1890 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1891 } 1892 1893 /* 1894 * Send AF_UNIX data. 1895 */ 1896 1897 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1898 size_t len) 1899 { 1900 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1901 struct sock *sk = sock->sk, *other = NULL; 1902 struct unix_sock *u = unix_sk(sk); 1903 struct scm_cookie scm; 1904 struct sk_buff *skb; 1905 int data_len = 0; 1906 int sk_locked; 1907 long timeo; 1908 int err; 1909 1910 wait_for_unix_gc(); 1911 err = scm_send(sock, msg, &scm, false); 1912 if (err < 0) 1913 return err; 1914 1915 err = -EOPNOTSUPP; 1916 if (msg->msg_flags&MSG_OOB) 1917 goto out; 1918 1919 if (msg->msg_namelen) { 1920 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1921 if (err) 1922 goto out; 1923 } else { 1924 sunaddr = NULL; 1925 err = -ENOTCONN; 1926 other = unix_peer_get(sk); 1927 if (!other) 1928 goto out; 1929 } 1930 1931 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1932 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { 1933 err = unix_autobind(sk); 1934 if (err) 1935 goto out; 1936 } 1937 1938 err = -EMSGSIZE; 1939 if (len > sk->sk_sndbuf - 32) 1940 goto out; 1941 1942 if (len > SKB_MAX_ALLOC) { 1943 data_len = min_t(size_t, 1944 len - SKB_MAX_ALLOC, 1945 MAX_SKB_FRAGS * PAGE_SIZE); 1946 data_len = PAGE_ALIGN(data_len); 1947 1948 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1949 } 1950 1951 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1952 msg->msg_flags & MSG_DONTWAIT, &err, 1953 PAGE_ALLOC_COSTLY_ORDER); 1954 if (skb == NULL) 1955 goto out; 1956 1957 err = unix_scm_to_skb(&scm, skb, true); 1958 if (err < 0) 1959 goto out_free; 1960 1961 skb_put(skb, len - data_len); 1962 skb->data_len = data_len; 1963 skb->len = len; 1964 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1965 if (err) 1966 goto out_free; 1967 1968 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1969 1970 restart: 1971 if (!other) { 1972 err = -ECONNRESET; 1973 if (sunaddr == NULL) 1974 goto out_free; 1975 1976 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1977 sk->sk_type); 1978 if (IS_ERR(other)) { 1979 err = PTR_ERR(other); 1980 other = NULL; 1981 goto out_free; 1982 } 1983 } 1984 1985 if (sk_filter(other, skb) < 0) { 1986 /* Toss the packet but do not return any error to the sender */ 1987 err = len; 1988 goto out_free; 1989 } 1990 1991 sk_locked = 0; 1992 unix_state_lock(other); 1993 restart_locked: 1994 err = -EPERM; 1995 if (!unix_may_send(sk, other)) 1996 goto out_unlock; 1997 1998 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1999 /* 2000 * Check with 1003.1g - what should 2001 * datagram error 2002 */ 2003 unix_state_unlock(other); 2004 sock_put(other); 2005 2006 if (!sk_locked) 2007 unix_state_lock(sk); 2008 2009 err = 0; 2010 if (sk->sk_type == SOCK_SEQPACKET) { 2011 /* We are here only when racing with unix_release_sock() 2012 * is clearing @other. Never change state to TCP_CLOSE 2013 * unlike SOCK_DGRAM wants. 2014 */ 2015 unix_state_unlock(sk); 2016 err = -EPIPE; 2017 } else if (unix_peer(sk) == other) { 2018 unix_peer(sk) = NULL; 2019 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 2020 2021 sk->sk_state = TCP_CLOSE; 2022 unix_state_unlock(sk); 2023 2024 unix_dgram_disconnected(sk, other); 2025 sock_put(other); 2026 err = -ECONNREFUSED; 2027 } else { 2028 unix_state_unlock(sk); 2029 } 2030 2031 other = NULL; 2032 if (err) 2033 goto out_free; 2034 goto restart; 2035 } 2036 2037 err = -EPIPE; 2038 if (other->sk_shutdown & RCV_SHUTDOWN) 2039 goto out_unlock; 2040 2041 if (sk->sk_type != SOCK_SEQPACKET) { 2042 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 2043 if (err) 2044 goto out_unlock; 2045 } 2046 2047 /* other == sk && unix_peer(other) != sk if 2048 * - unix_peer(sk) == NULL, destination address bound to sk 2049 * - unix_peer(sk) == sk by time of get but disconnected before lock 2050 */ 2051 if (other != sk && 2052 unlikely(unix_peer(other) != sk && 2053 unix_recvq_full_lockless(other))) { 2054 if (timeo) { 2055 timeo = unix_wait_for_peer(other, timeo); 2056 2057 err = sock_intr_errno(timeo); 2058 if (signal_pending(current)) 2059 goto out_free; 2060 2061 goto restart; 2062 } 2063 2064 if (!sk_locked) { 2065 unix_state_unlock(other); 2066 unix_state_double_lock(sk, other); 2067 } 2068 2069 if (unix_peer(sk) != other || 2070 unix_dgram_peer_wake_me(sk, other)) { 2071 err = -EAGAIN; 2072 sk_locked = 1; 2073 goto out_unlock; 2074 } 2075 2076 if (!sk_locked) { 2077 sk_locked = 1; 2078 goto restart_locked; 2079 } 2080 } 2081 2082 if (unlikely(sk_locked)) 2083 unix_state_unlock(sk); 2084 2085 if (sock_flag(other, SOCK_RCVTSTAMP)) 2086 __net_timestamp(skb); 2087 maybe_add_creds(skb, sock, other); 2088 scm_stat_add(other, skb); 2089 skb_queue_tail(&other->sk_receive_queue, skb); 2090 unix_state_unlock(other); 2091 other->sk_data_ready(other); 2092 sock_put(other); 2093 scm_destroy(&scm); 2094 return len; 2095 2096 out_unlock: 2097 if (sk_locked) 2098 unix_state_unlock(sk); 2099 unix_state_unlock(other); 2100 out_free: 2101 kfree_skb(skb); 2102 out: 2103 if (other) 2104 sock_put(other); 2105 scm_destroy(&scm); 2106 return err; 2107 } 2108 2109 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2110 * bytes, and a minimum of a full page. 2111 */ 2112 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2113 2114 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2115 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, 2116 struct scm_cookie *scm, bool fds_sent) 2117 { 2118 struct unix_sock *ousk = unix_sk(other); 2119 struct sk_buff *skb; 2120 int err = 0; 2121 2122 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2123 2124 if (!skb) 2125 return err; 2126 2127 err = unix_scm_to_skb(scm, skb, !fds_sent); 2128 if (err < 0) { 2129 kfree_skb(skb); 2130 return err; 2131 } 2132 skb_put(skb, 1); 2133 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2134 2135 if (err) { 2136 kfree_skb(skb); 2137 return err; 2138 } 2139 2140 unix_state_lock(other); 2141 2142 if (sock_flag(other, SOCK_DEAD) || 2143 (other->sk_shutdown & RCV_SHUTDOWN)) { 2144 unix_state_unlock(other); 2145 kfree_skb(skb); 2146 return -EPIPE; 2147 } 2148 2149 maybe_add_creds(skb, sock, other); 2150 skb_get(skb); 2151 2152 if (ousk->oob_skb) 2153 consume_skb(ousk->oob_skb); 2154 2155 WRITE_ONCE(ousk->oob_skb, skb); 2156 2157 scm_stat_add(other, skb); 2158 skb_queue_tail(&other->sk_receive_queue, skb); 2159 sk_send_sigurg(other); 2160 unix_state_unlock(other); 2161 other->sk_data_ready(other); 2162 2163 return err; 2164 } 2165 #endif 2166 2167 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2168 size_t len) 2169 { 2170 struct sock *sk = sock->sk; 2171 struct sock *other = NULL; 2172 int err, size; 2173 struct sk_buff *skb; 2174 int sent = 0; 2175 struct scm_cookie scm; 2176 bool fds_sent = false; 2177 int data_len; 2178 2179 wait_for_unix_gc(); 2180 err = scm_send(sock, msg, &scm, false); 2181 if (err < 0) 2182 return err; 2183 2184 err = -EOPNOTSUPP; 2185 if (msg->msg_flags & MSG_OOB) { 2186 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2187 if (len) 2188 len--; 2189 else 2190 #endif 2191 goto out_err; 2192 } 2193 2194 if (msg->msg_namelen) { 2195 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2196 goto out_err; 2197 } else { 2198 err = -ENOTCONN; 2199 other = unix_peer(sk); 2200 if (!other) 2201 goto out_err; 2202 } 2203 2204 if (sk->sk_shutdown & SEND_SHUTDOWN) 2205 goto pipe_err; 2206 2207 while (sent < len) { 2208 size = len - sent; 2209 2210 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2211 skb = sock_alloc_send_pskb(sk, 0, 0, 2212 msg->msg_flags & MSG_DONTWAIT, 2213 &err, 0); 2214 } else { 2215 /* Keep two messages in the pipe so it schedules better */ 2216 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2217 2218 /* allow fallback to order-0 allocations */ 2219 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2220 2221 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2222 2223 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2224 2225 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2226 msg->msg_flags & MSG_DONTWAIT, &err, 2227 get_order(UNIX_SKB_FRAGS_SZ)); 2228 } 2229 if (!skb) 2230 goto out_err; 2231 2232 /* Only send the fds in the first buffer */ 2233 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2234 if (err < 0) { 2235 kfree_skb(skb); 2236 goto out_err; 2237 } 2238 fds_sent = true; 2239 2240 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2241 err = skb_splice_from_iter(skb, &msg->msg_iter, size, 2242 sk->sk_allocation); 2243 if (err < 0) { 2244 kfree_skb(skb); 2245 goto out_err; 2246 } 2247 size = err; 2248 refcount_add(size, &sk->sk_wmem_alloc); 2249 } else { 2250 skb_put(skb, size - data_len); 2251 skb->data_len = data_len; 2252 skb->len = size; 2253 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2254 if (err) { 2255 kfree_skb(skb); 2256 goto out_err; 2257 } 2258 } 2259 2260 unix_state_lock(other); 2261 2262 if (sock_flag(other, SOCK_DEAD) || 2263 (other->sk_shutdown & RCV_SHUTDOWN)) 2264 goto pipe_err_free; 2265 2266 maybe_add_creds(skb, sock, other); 2267 scm_stat_add(other, skb); 2268 skb_queue_tail(&other->sk_receive_queue, skb); 2269 unix_state_unlock(other); 2270 other->sk_data_ready(other); 2271 sent += size; 2272 } 2273 2274 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2275 if (msg->msg_flags & MSG_OOB) { 2276 err = queue_oob(sock, msg, other, &scm, fds_sent); 2277 if (err) 2278 goto out_err; 2279 sent++; 2280 } 2281 #endif 2282 2283 scm_destroy(&scm); 2284 2285 return sent; 2286 2287 pipe_err_free: 2288 unix_state_unlock(other); 2289 kfree_skb(skb); 2290 pipe_err: 2291 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2292 send_sig(SIGPIPE, current, 0); 2293 err = -EPIPE; 2294 out_err: 2295 scm_destroy(&scm); 2296 return sent ? : err; 2297 } 2298 2299 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2300 size_t len) 2301 { 2302 int err; 2303 struct sock *sk = sock->sk; 2304 2305 err = sock_error(sk); 2306 if (err) 2307 return err; 2308 2309 if (sk->sk_state != TCP_ESTABLISHED) 2310 return -ENOTCONN; 2311 2312 if (msg->msg_namelen) 2313 msg->msg_namelen = 0; 2314 2315 return unix_dgram_sendmsg(sock, msg, len); 2316 } 2317 2318 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2319 size_t size, int flags) 2320 { 2321 struct sock *sk = sock->sk; 2322 2323 if (sk->sk_state != TCP_ESTABLISHED) 2324 return -ENOTCONN; 2325 2326 return unix_dgram_recvmsg(sock, msg, size, flags); 2327 } 2328 2329 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2330 { 2331 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2332 2333 if (addr) { 2334 msg->msg_namelen = addr->len; 2335 memcpy(msg->msg_name, addr->name, addr->len); 2336 } 2337 } 2338 2339 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2340 int flags) 2341 { 2342 struct scm_cookie scm; 2343 struct socket *sock = sk->sk_socket; 2344 struct unix_sock *u = unix_sk(sk); 2345 struct sk_buff *skb, *last; 2346 long timeo; 2347 int skip; 2348 int err; 2349 2350 err = -EOPNOTSUPP; 2351 if (flags&MSG_OOB) 2352 goto out; 2353 2354 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2355 2356 do { 2357 mutex_lock(&u->iolock); 2358 2359 skip = sk_peek_offset(sk, flags); 2360 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2361 &skip, &err, &last); 2362 if (skb) { 2363 if (!(flags & MSG_PEEK)) 2364 scm_stat_del(sk, skb); 2365 break; 2366 } 2367 2368 mutex_unlock(&u->iolock); 2369 2370 if (err != -EAGAIN) 2371 break; 2372 } while (timeo && 2373 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2374 &err, &timeo, last)); 2375 2376 if (!skb) { /* implies iolock unlocked */ 2377 unix_state_lock(sk); 2378 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2379 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2380 (sk->sk_shutdown & RCV_SHUTDOWN)) 2381 err = 0; 2382 unix_state_unlock(sk); 2383 goto out; 2384 } 2385 2386 if (wq_has_sleeper(&u->peer_wait)) 2387 wake_up_interruptible_sync_poll(&u->peer_wait, 2388 EPOLLOUT | EPOLLWRNORM | 2389 EPOLLWRBAND); 2390 2391 if (msg->msg_name) 2392 unix_copy_addr(msg, skb->sk); 2393 2394 if (size > skb->len - skip) 2395 size = skb->len - skip; 2396 else if (size < skb->len - skip) 2397 msg->msg_flags |= MSG_TRUNC; 2398 2399 err = skb_copy_datagram_msg(skb, skip, msg, size); 2400 if (err) 2401 goto out_free; 2402 2403 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2404 __sock_recv_timestamp(msg, sk, skb); 2405 2406 memset(&scm, 0, sizeof(scm)); 2407 2408 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2409 unix_set_secdata(&scm, skb); 2410 2411 if (!(flags & MSG_PEEK)) { 2412 if (UNIXCB(skb).fp) 2413 unix_detach_fds(&scm, skb); 2414 2415 sk_peek_offset_bwd(sk, skb->len); 2416 } else { 2417 /* It is questionable: on PEEK we could: 2418 - do not return fds - good, but too simple 8) 2419 - return fds, and do not return them on read (old strategy, 2420 apparently wrong) 2421 - clone fds (I chose it for now, it is the most universal 2422 solution) 2423 2424 POSIX 1003.1g does not actually define this clearly 2425 at all. POSIX 1003.1g doesn't define a lot of things 2426 clearly however! 2427 2428 */ 2429 2430 sk_peek_offset_fwd(sk, size); 2431 2432 if (UNIXCB(skb).fp) 2433 unix_peek_fds(&scm, skb); 2434 } 2435 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2436 2437 scm_recv_unix(sock, msg, &scm, flags); 2438 2439 out_free: 2440 skb_free_datagram(sk, skb); 2441 mutex_unlock(&u->iolock); 2442 out: 2443 return err; 2444 } 2445 2446 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2447 int flags) 2448 { 2449 struct sock *sk = sock->sk; 2450 2451 #ifdef CONFIG_BPF_SYSCALL 2452 const struct proto *prot = READ_ONCE(sk->sk_prot); 2453 2454 if (prot != &unix_dgram_proto) 2455 return prot->recvmsg(sk, msg, size, flags, NULL); 2456 #endif 2457 return __unix_dgram_recvmsg(sk, msg, size, flags); 2458 } 2459 2460 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2461 { 2462 struct unix_sock *u = unix_sk(sk); 2463 struct sk_buff *skb; 2464 int err; 2465 2466 mutex_lock(&u->iolock); 2467 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2468 mutex_unlock(&u->iolock); 2469 if (!skb) 2470 return err; 2471 2472 return recv_actor(sk, skb); 2473 } 2474 2475 /* 2476 * Sleep until more data has arrived. But check for races.. 2477 */ 2478 static long unix_stream_data_wait(struct sock *sk, long timeo, 2479 struct sk_buff *last, unsigned int last_len, 2480 bool freezable) 2481 { 2482 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; 2483 struct sk_buff *tail; 2484 DEFINE_WAIT(wait); 2485 2486 unix_state_lock(sk); 2487 2488 for (;;) { 2489 prepare_to_wait(sk_sleep(sk), &wait, state); 2490 2491 tail = skb_peek_tail(&sk->sk_receive_queue); 2492 if (tail != last || 2493 (tail && tail->len != last_len) || 2494 sk->sk_err || 2495 (sk->sk_shutdown & RCV_SHUTDOWN) || 2496 signal_pending(current) || 2497 !timeo) 2498 break; 2499 2500 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2501 unix_state_unlock(sk); 2502 timeo = schedule_timeout(timeo); 2503 unix_state_lock(sk); 2504 2505 if (sock_flag(sk, SOCK_DEAD)) 2506 break; 2507 2508 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2509 } 2510 2511 finish_wait(sk_sleep(sk), &wait); 2512 unix_state_unlock(sk); 2513 return timeo; 2514 } 2515 2516 static unsigned int unix_skb_len(const struct sk_buff *skb) 2517 { 2518 return skb->len - UNIXCB(skb).consumed; 2519 } 2520 2521 struct unix_stream_read_state { 2522 int (*recv_actor)(struct sk_buff *, int, int, 2523 struct unix_stream_read_state *); 2524 struct socket *socket; 2525 struct msghdr *msg; 2526 struct pipe_inode_info *pipe; 2527 size_t size; 2528 int flags; 2529 unsigned int splice_flags; 2530 }; 2531 2532 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2533 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2534 { 2535 struct socket *sock = state->socket; 2536 struct sock *sk = sock->sk; 2537 struct unix_sock *u = unix_sk(sk); 2538 int chunk = 1; 2539 struct sk_buff *oob_skb; 2540 2541 mutex_lock(&u->iolock); 2542 unix_state_lock(sk); 2543 2544 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2545 unix_state_unlock(sk); 2546 mutex_unlock(&u->iolock); 2547 return -EINVAL; 2548 } 2549 2550 oob_skb = u->oob_skb; 2551 2552 if (!(state->flags & MSG_PEEK)) 2553 WRITE_ONCE(u->oob_skb, NULL); 2554 else 2555 skb_get(oob_skb); 2556 unix_state_unlock(sk); 2557 2558 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2559 2560 if (!(state->flags & MSG_PEEK)) 2561 UNIXCB(oob_skb).consumed += 1; 2562 2563 consume_skb(oob_skb); 2564 2565 mutex_unlock(&u->iolock); 2566 2567 if (chunk < 0) 2568 return -EFAULT; 2569 2570 state->msg->msg_flags |= MSG_OOB; 2571 return 1; 2572 } 2573 2574 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2575 int flags, int copied) 2576 { 2577 struct unix_sock *u = unix_sk(sk); 2578 2579 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2580 skb_unlink(skb, &sk->sk_receive_queue); 2581 consume_skb(skb); 2582 skb = NULL; 2583 } else { 2584 if (skb == u->oob_skb) { 2585 if (copied) { 2586 skb = NULL; 2587 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2588 if (!(flags & MSG_PEEK)) { 2589 WRITE_ONCE(u->oob_skb, NULL); 2590 consume_skb(skb); 2591 } 2592 } else if (!(flags & MSG_PEEK)) { 2593 skb_unlink(skb, &sk->sk_receive_queue); 2594 consume_skb(skb); 2595 skb = skb_peek(&sk->sk_receive_queue); 2596 } 2597 } 2598 } 2599 return skb; 2600 } 2601 #endif 2602 2603 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2604 { 2605 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2606 return -ENOTCONN; 2607 2608 return unix_read_skb(sk, recv_actor); 2609 } 2610 2611 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2612 bool freezable) 2613 { 2614 struct scm_cookie scm; 2615 struct socket *sock = state->socket; 2616 struct sock *sk = sock->sk; 2617 struct unix_sock *u = unix_sk(sk); 2618 int copied = 0; 2619 int flags = state->flags; 2620 int noblock = flags & MSG_DONTWAIT; 2621 bool check_creds = false; 2622 int target; 2623 int err = 0; 2624 long timeo; 2625 int skip; 2626 size_t size = state->size; 2627 unsigned int last_len; 2628 2629 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2630 err = -EINVAL; 2631 goto out; 2632 } 2633 2634 if (unlikely(flags & MSG_OOB)) { 2635 err = -EOPNOTSUPP; 2636 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2637 err = unix_stream_recv_urg(state); 2638 #endif 2639 goto out; 2640 } 2641 2642 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2643 timeo = sock_rcvtimeo(sk, noblock); 2644 2645 memset(&scm, 0, sizeof(scm)); 2646 2647 /* Lock the socket to prevent queue disordering 2648 * while sleeps in memcpy_tomsg 2649 */ 2650 mutex_lock(&u->iolock); 2651 2652 skip = max(sk_peek_offset(sk, flags), 0); 2653 2654 do { 2655 int chunk; 2656 bool drop_skb; 2657 struct sk_buff *skb, *last; 2658 2659 redo: 2660 unix_state_lock(sk); 2661 if (sock_flag(sk, SOCK_DEAD)) { 2662 err = -ECONNRESET; 2663 goto unlock; 2664 } 2665 last = skb = skb_peek(&sk->sk_receive_queue); 2666 last_len = last ? last->len : 0; 2667 2668 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2669 if (skb) { 2670 skb = manage_oob(skb, sk, flags, copied); 2671 if (!skb) { 2672 unix_state_unlock(sk); 2673 if (copied) 2674 break; 2675 goto redo; 2676 } 2677 } 2678 #endif 2679 again: 2680 if (skb == NULL) { 2681 if (copied >= target) 2682 goto unlock; 2683 2684 /* 2685 * POSIX 1003.1g mandates this order. 2686 */ 2687 2688 err = sock_error(sk); 2689 if (err) 2690 goto unlock; 2691 if (sk->sk_shutdown & RCV_SHUTDOWN) 2692 goto unlock; 2693 2694 unix_state_unlock(sk); 2695 if (!timeo) { 2696 err = -EAGAIN; 2697 break; 2698 } 2699 2700 mutex_unlock(&u->iolock); 2701 2702 timeo = unix_stream_data_wait(sk, timeo, last, 2703 last_len, freezable); 2704 2705 if (signal_pending(current)) { 2706 err = sock_intr_errno(timeo); 2707 scm_destroy(&scm); 2708 goto out; 2709 } 2710 2711 mutex_lock(&u->iolock); 2712 goto redo; 2713 unlock: 2714 unix_state_unlock(sk); 2715 break; 2716 } 2717 2718 while (skip >= unix_skb_len(skb)) { 2719 skip -= unix_skb_len(skb); 2720 last = skb; 2721 last_len = skb->len; 2722 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2723 if (!skb) 2724 goto again; 2725 } 2726 2727 unix_state_unlock(sk); 2728 2729 if (check_creds) { 2730 /* Never glue messages from different writers */ 2731 if (!unix_skb_scm_eq(skb, &scm)) 2732 break; 2733 } else if (test_bit(SOCK_PASSCRED, &sock->flags) || 2734 test_bit(SOCK_PASSPIDFD, &sock->flags)) { 2735 /* Copy credentials */ 2736 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2737 unix_set_secdata(&scm, skb); 2738 check_creds = true; 2739 } 2740 2741 /* Copy address just once */ 2742 if (state->msg && state->msg->msg_name) { 2743 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2744 state->msg->msg_name); 2745 unix_copy_addr(state->msg, skb->sk); 2746 sunaddr = NULL; 2747 } 2748 2749 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2750 skb_get(skb); 2751 chunk = state->recv_actor(skb, skip, chunk, state); 2752 drop_skb = !unix_skb_len(skb); 2753 /* skb is only safe to use if !drop_skb */ 2754 consume_skb(skb); 2755 if (chunk < 0) { 2756 if (copied == 0) 2757 copied = -EFAULT; 2758 break; 2759 } 2760 copied += chunk; 2761 size -= chunk; 2762 2763 if (drop_skb) { 2764 /* the skb was touched by a concurrent reader; 2765 * we should not expect anything from this skb 2766 * anymore and assume it invalid - we can be 2767 * sure it was dropped from the socket queue 2768 * 2769 * let's report a short read 2770 */ 2771 err = 0; 2772 break; 2773 } 2774 2775 /* Mark read part of skb as used */ 2776 if (!(flags & MSG_PEEK)) { 2777 UNIXCB(skb).consumed += chunk; 2778 2779 sk_peek_offset_bwd(sk, chunk); 2780 2781 if (UNIXCB(skb).fp) { 2782 scm_stat_del(sk, skb); 2783 unix_detach_fds(&scm, skb); 2784 } 2785 2786 if (unix_skb_len(skb)) 2787 break; 2788 2789 skb_unlink(skb, &sk->sk_receive_queue); 2790 consume_skb(skb); 2791 2792 if (scm.fp) 2793 break; 2794 } else { 2795 /* It is questionable, see note in unix_dgram_recvmsg. 2796 */ 2797 if (UNIXCB(skb).fp) 2798 unix_peek_fds(&scm, skb); 2799 2800 sk_peek_offset_fwd(sk, chunk); 2801 2802 if (UNIXCB(skb).fp) 2803 break; 2804 2805 skip = 0; 2806 last = skb; 2807 last_len = skb->len; 2808 unix_state_lock(sk); 2809 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2810 if (skb) 2811 goto again; 2812 unix_state_unlock(sk); 2813 break; 2814 } 2815 } while (size); 2816 2817 mutex_unlock(&u->iolock); 2818 if (state->msg) 2819 scm_recv_unix(sock, state->msg, &scm, flags); 2820 else 2821 scm_destroy(&scm); 2822 out: 2823 return copied ? : err; 2824 } 2825 2826 static int unix_stream_read_actor(struct sk_buff *skb, 2827 int skip, int chunk, 2828 struct unix_stream_read_state *state) 2829 { 2830 int ret; 2831 2832 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2833 state->msg, chunk); 2834 return ret ?: chunk; 2835 } 2836 2837 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2838 size_t size, int flags) 2839 { 2840 struct unix_stream_read_state state = { 2841 .recv_actor = unix_stream_read_actor, 2842 .socket = sk->sk_socket, 2843 .msg = msg, 2844 .size = size, 2845 .flags = flags 2846 }; 2847 2848 return unix_stream_read_generic(&state, true); 2849 } 2850 2851 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2852 size_t size, int flags) 2853 { 2854 struct unix_stream_read_state state = { 2855 .recv_actor = unix_stream_read_actor, 2856 .socket = sock, 2857 .msg = msg, 2858 .size = size, 2859 .flags = flags 2860 }; 2861 2862 #ifdef CONFIG_BPF_SYSCALL 2863 struct sock *sk = sock->sk; 2864 const struct proto *prot = READ_ONCE(sk->sk_prot); 2865 2866 if (prot != &unix_stream_proto) 2867 return prot->recvmsg(sk, msg, size, flags, NULL); 2868 #endif 2869 return unix_stream_read_generic(&state, true); 2870 } 2871 2872 static int unix_stream_splice_actor(struct sk_buff *skb, 2873 int skip, int chunk, 2874 struct unix_stream_read_state *state) 2875 { 2876 return skb_splice_bits(skb, state->socket->sk, 2877 UNIXCB(skb).consumed + skip, 2878 state->pipe, chunk, state->splice_flags); 2879 } 2880 2881 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2882 struct pipe_inode_info *pipe, 2883 size_t size, unsigned int flags) 2884 { 2885 struct unix_stream_read_state state = { 2886 .recv_actor = unix_stream_splice_actor, 2887 .socket = sock, 2888 .pipe = pipe, 2889 .size = size, 2890 .splice_flags = flags, 2891 }; 2892 2893 if (unlikely(*ppos)) 2894 return -ESPIPE; 2895 2896 if (sock->file->f_flags & O_NONBLOCK || 2897 flags & SPLICE_F_NONBLOCK) 2898 state.flags = MSG_DONTWAIT; 2899 2900 return unix_stream_read_generic(&state, false); 2901 } 2902 2903 static int unix_shutdown(struct socket *sock, int mode) 2904 { 2905 struct sock *sk = sock->sk; 2906 struct sock *other; 2907 2908 if (mode < SHUT_RD || mode > SHUT_RDWR) 2909 return -EINVAL; 2910 /* This maps: 2911 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2912 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2913 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2914 */ 2915 ++mode; 2916 2917 unix_state_lock(sk); 2918 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); 2919 other = unix_peer(sk); 2920 if (other) 2921 sock_hold(other); 2922 unix_state_unlock(sk); 2923 sk->sk_state_change(sk); 2924 2925 if (other && 2926 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2927 2928 int peer_mode = 0; 2929 const struct proto *prot = READ_ONCE(other->sk_prot); 2930 2931 if (prot->unhash) 2932 prot->unhash(other); 2933 if (mode&RCV_SHUTDOWN) 2934 peer_mode |= SEND_SHUTDOWN; 2935 if (mode&SEND_SHUTDOWN) 2936 peer_mode |= RCV_SHUTDOWN; 2937 unix_state_lock(other); 2938 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); 2939 unix_state_unlock(other); 2940 other->sk_state_change(other); 2941 if (peer_mode == SHUTDOWN_MASK) 2942 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2943 else if (peer_mode & RCV_SHUTDOWN) 2944 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2945 } 2946 if (other) 2947 sock_put(other); 2948 2949 return 0; 2950 } 2951 2952 long unix_inq_len(struct sock *sk) 2953 { 2954 struct sk_buff *skb; 2955 long amount = 0; 2956 2957 if (sk->sk_state == TCP_LISTEN) 2958 return -EINVAL; 2959 2960 spin_lock(&sk->sk_receive_queue.lock); 2961 if (sk->sk_type == SOCK_STREAM || 2962 sk->sk_type == SOCK_SEQPACKET) { 2963 skb_queue_walk(&sk->sk_receive_queue, skb) 2964 amount += unix_skb_len(skb); 2965 } else { 2966 skb = skb_peek(&sk->sk_receive_queue); 2967 if (skb) 2968 amount = skb->len; 2969 } 2970 spin_unlock(&sk->sk_receive_queue.lock); 2971 2972 return amount; 2973 } 2974 EXPORT_SYMBOL_GPL(unix_inq_len); 2975 2976 long unix_outq_len(struct sock *sk) 2977 { 2978 return sk_wmem_alloc_get(sk); 2979 } 2980 EXPORT_SYMBOL_GPL(unix_outq_len); 2981 2982 static int unix_open_file(struct sock *sk) 2983 { 2984 struct path path; 2985 struct file *f; 2986 int fd; 2987 2988 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2989 return -EPERM; 2990 2991 if (!smp_load_acquire(&unix_sk(sk)->addr)) 2992 return -ENOENT; 2993 2994 path = unix_sk(sk)->path; 2995 if (!path.dentry) 2996 return -ENOENT; 2997 2998 path_get(&path); 2999 3000 fd = get_unused_fd_flags(O_CLOEXEC); 3001 if (fd < 0) 3002 goto out; 3003 3004 f = dentry_open(&path, O_PATH, current_cred()); 3005 if (IS_ERR(f)) { 3006 put_unused_fd(fd); 3007 fd = PTR_ERR(f); 3008 goto out; 3009 } 3010 3011 fd_install(fd, f); 3012 out: 3013 path_put(&path); 3014 3015 return fd; 3016 } 3017 3018 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3019 { 3020 struct sock *sk = sock->sk; 3021 long amount = 0; 3022 int err; 3023 3024 switch (cmd) { 3025 case SIOCOUTQ: 3026 amount = unix_outq_len(sk); 3027 err = put_user(amount, (int __user *)arg); 3028 break; 3029 case SIOCINQ: 3030 amount = unix_inq_len(sk); 3031 if (amount < 0) 3032 err = amount; 3033 else 3034 err = put_user(amount, (int __user *)arg); 3035 break; 3036 case SIOCUNIXFILE: 3037 err = unix_open_file(sk); 3038 break; 3039 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3040 case SIOCATMARK: 3041 { 3042 struct sk_buff *skb; 3043 int answ = 0; 3044 3045 skb = skb_peek(&sk->sk_receive_queue); 3046 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3047 answ = 1; 3048 err = put_user(answ, (int __user *)arg); 3049 } 3050 break; 3051 #endif 3052 default: 3053 err = -ENOIOCTLCMD; 3054 break; 3055 } 3056 return err; 3057 } 3058 3059 #ifdef CONFIG_COMPAT 3060 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3061 { 3062 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3063 } 3064 #endif 3065 3066 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3067 { 3068 struct sock *sk = sock->sk; 3069 __poll_t mask; 3070 u8 shutdown; 3071 3072 sock_poll_wait(file, sock, wait); 3073 mask = 0; 3074 shutdown = READ_ONCE(sk->sk_shutdown); 3075 3076 /* exceptional events? */ 3077 if (READ_ONCE(sk->sk_err)) 3078 mask |= EPOLLERR; 3079 if (shutdown == SHUTDOWN_MASK) 3080 mask |= EPOLLHUP; 3081 if (shutdown & RCV_SHUTDOWN) 3082 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3083 3084 /* readable? */ 3085 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3086 mask |= EPOLLIN | EPOLLRDNORM; 3087 if (sk_is_readable(sk)) 3088 mask |= EPOLLIN | EPOLLRDNORM; 3089 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3090 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3091 mask |= EPOLLPRI; 3092 #endif 3093 3094 /* Connection-based need to check for termination and startup */ 3095 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3096 sk->sk_state == TCP_CLOSE) 3097 mask |= EPOLLHUP; 3098 3099 /* 3100 * we set writable also when the other side has shut down the 3101 * connection. This prevents stuck sockets. 3102 */ 3103 if (unix_writable(sk)) 3104 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3105 3106 return mask; 3107 } 3108 3109 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3110 poll_table *wait) 3111 { 3112 struct sock *sk = sock->sk, *other; 3113 unsigned int writable; 3114 __poll_t mask; 3115 u8 shutdown; 3116 3117 sock_poll_wait(file, sock, wait); 3118 mask = 0; 3119 shutdown = READ_ONCE(sk->sk_shutdown); 3120 3121 /* exceptional events? */ 3122 if (READ_ONCE(sk->sk_err) || 3123 !skb_queue_empty_lockless(&sk->sk_error_queue)) 3124 mask |= EPOLLERR | 3125 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3126 3127 if (shutdown & RCV_SHUTDOWN) 3128 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3129 if (shutdown == SHUTDOWN_MASK) 3130 mask |= EPOLLHUP; 3131 3132 /* readable? */ 3133 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3134 mask |= EPOLLIN | EPOLLRDNORM; 3135 if (sk_is_readable(sk)) 3136 mask |= EPOLLIN | EPOLLRDNORM; 3137 3138 /* Connection-based need to check for termination and startup */ 3139 if (sk->sk_type == SOCK_SEQPACKET) { 3140 if (sk->sk_state == TCP_CLOSE) 3141 mask |= EPOLLHUP; 3142 /* connection hasn't started yet? */ 3143 if (sk->sk_state == TCP_SYN_SENT) 3144 return mask; 3145 } 3146 3147 /* No write status requested, avoid expensive OUT tests. */ 3148 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3149 return mask; 3150 3151 writable = unix_writable(sk); 3152 if (writable) { 3153 unix_state_lock(sk); 3154 3155 other = unix_peer(sk); 3156 if (other && unix_peer(other) != sk && 3157 unix_recvq_full_lockless(other) && 3158 unix_dgram_peer_wake_me(sk, other)) 3159 writable = 0; 3160 3161 unix_state_unlock(sk); 3162 } 3163 3164 if (writable) 3165 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3166 else 3167 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3168 3169 return mask; 3170 } 3171 3172 #ifdef CONFIG_PROC_FS 3173 3174 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3175 3176 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3177 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3178 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3179 3180 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3181 { 3182 unsigned long offset = get_offset(*pos); 3183 unsigned long bucket = get_bucket(*pos); 3184 unsigned long count = 0; 3185 struct sock *sk; 3186 3187 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3188 sk; sk = sk_next(sk)) { 3189 if (++count == offset) 3190 break; 3191 } 3192 3193 return sk; 3194 } 3195 3196 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3197 { 3198 unsigned long bucket = get_bucket(*pos); 3199 struct net *net = seq_file_net(seq); 3200 struct sock *sk; 3201 3202 while (bucket < UNIX_HASH_SIZE) { 3203 spin_lock(&net->unx.table.locks[bucket]); 3204 3205 sk = unix_from_bucket(seq, pos); 3206 if (sk) 3207 return sk; 3208 3209 spin_unlock(&net->unx.table.locks[bucket]); 3210 3211 *pos = set_bucket_offset(++bucket, 1); 3212 } 3213 3214 return NULL; 3215 } 3216 3217 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3218 loff_t *pos) 3219 { 3220 unsigned long bucket = get_bucket(*pos); 3221 3222 sk = sk_next(sk); 3223 if (sk) 3224 return sk; 3225 3226 3227 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3228 3229 *pos = set_bucket_offset(++bucket, 1); 3230 3231 return unix_get_first(seq, pos); 3232 } 3233 3234 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3235 { 3236 if (!*pos) 3237 return SEQ_START_TOKEN; 3238 3239 return unix_get_first(seq, pos); 3240 } 3241 3242 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3243 { 3244 ++*pos; 3245 3246 if (v == SEQ_START_TOKEN) 3247 return unix_get_first(seq, pos); 3248 3249 return unix_get_next(seq, v, pos); 3250 } 3251 3252 static void unix_seq_stop(struct seq_file *seq, void *v) 3253 { 3254 struct sock *sk = v; 3255 3256 if (sk) 3257 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3258 } 3259 3260 static int unix_seq_show(struct seq_file *seq, void *v) 3261 { 3262 3263 if (v == SEQ_START_TOKEN) 3264 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3265 "Inode Path\n"); 3266 else { 3267 struct sock *s = v; 3268 struct unix_sock *u = unix_sk(s); 3269 unix_state_lock(s); 3270 3271 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3272 s, 3273 refcount_read(&s->sk_refcnt), 3274 0, 3275 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3276 s->sk_type, 3277 s->sk_socket ? 3278 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3279 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3280 sock_i_ino(s)); 3281 3282 if (u->addr) { // under a hash table lock here 3283 int i, len; 3284 seq_putc(seq, ' '); 3285 3286 i = 0; 3287 len = u->addr->len - 3288 offsetof(struct sockaddr_un, sun_path); 3289 if (u->addr->name->sun_path[0]) { 3290 len--; 3291 } else { 3292 seq_putc(seq, '@'); 3293 i++; 3294 } 3295 for ( ; i < len; i++) 3296 seq_putc(seq, u->addr->name->sun_path[i] ?: 3297 '@'); 3298 } 3299 unix_state_unlock(s); 3300 seq_putc(seq, '\n'); 3301 } 3302 3303 return 0; 3304 } 3305 3306 static const struct seq_operations unix_seq_ops = { 3307 .start = unix_seq_start, 3308 .next = unix_seq_next, 3309 .stop = unix_seq_stop, 3310 .show = unix_seq_show, 3311 }; 3312 3313 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3314 struct bpf_unix_iter_state { 3315 struct seq_net_private p; 3316 unsigned int cur_sk; 3317 unsigned int end_sk; 3318 unsigned int max_sk; 3319 struct sock **batch; 3320 bool st_bucket_done; 3321 }; 3322 3323 struct bpf_iter__unix { 3324 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3325 __bpf_md_ptr(struct unix_sock *, unix_sk); 3326 uid_t uid __aligned(8); 3327 }; 3328 3329 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3330 struct unix_sock *unix_sk, uid_t uid) 3331 { 3332 struct bpf_iter__unix ctx; 3333 3334 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3335 ctx.meta = meta; 3336 ctx.unix_sk = unix_sk; 3337 ctx.uid = uid; 3338 return bpf_iter_run_prog(prog, &ctx); 3339 } 3340 3341 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3342 3343 { 3344 struct bpf_unix_iter_state *iter = seq->private; 3345 unsigned int expected = 1; 3346 struct sock *sk; 3347 3348 sock_hold(start_sk); 3349 iter->batch[iter->end_sk++] = start_sk; 3350 3351 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3352 if (iter->end_sk < iter->max_sk) { 3353 sock_hold(sk); 3354 iter->batch[iter->end_sk++] = sk; 3355 } 3356 3357 expected++; 3358 } 3359 3360 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3361 3362 return expected; 3363 } 3364 3365 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3366 { 3367 while (iter->cur_sk < iter->end_sk) 3368 sock_put(iter->batch[iter->cur_sk++]); 3369 } 3370 3371 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3372 unsigned int new_batch_sz) 3373 { 3374 struct sock **new_batch; 3375 3376 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3377 GFP_USER | __GFP_NOWARN); 3378 if (!new_batch) 3379 return -ENOMEM; 3380 3381 bpf_iter_unix_put_batch(iter); 3382 kvfree(iter->batch); 3383 iter->batch = new_batch; 3384 iter->max_sk = new_batch_sz; 3385 3386 return 0; 3387 } 3388 3389 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3390 loff_t *pos) 3391 { 3392 struct bpf_unix_iter_state *iter = seq->private; 3393 unsigned int expected; 3394 bool resized = false; 3395 struct sock *sk; 3396 3397 if (iter->st_bucket_done) 3398 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3399 3400 again: 3401 /* Get a new batch */ 3402 iter->cur_sk = 0; 3403 iter->end_sk = 0; 3404 3405 sk = unix_get_first(seq, pos); 3406 if (!sk) 3407 return NULL; /* Done */ 3408 3409 expected = bpf_iter_unix_hold_batch(seq, sk); 3410 3411 if (iter->end_sk == expected) { 3412 iter->st_bucket_done = true; 3413 return sk; 3414 } 3415 3416 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3417 resized = true; 3418 goto again; 3419 } 3420 3421 return sk; 3422 } 3423 3424 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3425 { 3426 if (!*pos) 3427 return SEQ_START_TOKEN; 3428 3429 /* bpf iter does not support lseek, so it always 3430 * continue from where it was stop()-ped. 3431 */ 3432 return bpf_iter_unix_batch(seq, pos); 3433 } 3434 3435 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3436 { 3437 struct bpf_unix_iter_state *iter = seq->private; 3438 struct sock *sk; 3439 3440 /* Whenever seq_next() is called, the iter->cur_sk is 3441 * done with seq_show(), so advance to the next sk in 3442 * the batch. 3443 */ 3444 if (iter->cur_sk < iter->end_sk) 3445 sock_put(iter->batch[iter->cur_sk++]); 3446 3447 ++*pos; 3448 3449 if (iter->cur_sk < iter->end_sk) 3450 sk = iter->batch[iter->cur_sk]; 3451 else 3452 sk = bpf_iter_unix_batch(seq, pos); 3453 3454 return sk; 3455 } 3456 3457 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3458 { 3459 struct bpf_iter_meta meta; 3460 struct bpf_prog *prog; 3461 struct sock *sk = v; 3462 uid_t uid; 3463 bool slow; 3464 int ret; 3465 3466 if (v == SEQ_START_TOKEN) 3467 return 0; 3468 3469 slow = lock_sock_fast(sk); 3470 3471 if (unlikely(sk_unhashed(sk))) { 3472 ret = SEQ_SKIP; 3473 goto unlock; 3474 } 3475 3476 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3477 meta.seq = seq; 3478 prog = bpf_iter_get_info(&meta, false); 3479 ret = unix_prog_seq_show(prog, &meta, v, uid); 3480 unlock: 3481 unlock_sock_fast(sk, slow); 3482 return ret; 3483 } 3484 3485 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3486 { 3487 struct bpf_unix_iter_state *iter = seq->private; 3488 struct bpf_iter_meta meta; 3489 struct bpf_prog *prog; 3490 3491 if (!v) { 3492 meta.seq = seq; 3493 prog = bpf_iter_get_info(&meta, true); 3494 if (prog) 3495 (void)unix_prog_seq_show(prog, &meta, v, 0); 3496 } 3497 3498 if (iter->cur_sk < iter->end_sk) 3499 bpf_iter_unix_put_batch(iter); 3500 } 3501 3502 static const struct seq_operations bpf_iter_unix_seq_ops = { 3503 .start = bpf_iter_unix_seq_start, 3504 .next = bpf_iter_unix_seq_next, 3505 .stop = bpf_iter_unix_seq_stop, 3506 .show = bpf_iter_unix_seq_show, 3507 }; 3508 #endif 3509 #endif 3510 3511 static const struct net_proto_family unix_family_ops = { 3512 .family = PF_UNIX, 3513 .create = unix_create, 3514 .owner = THIS_MODULE, 3515 }; 3516 3517 3518 static int __net_init unix_net_init(struct net *net) 3519 { 3520 int i; 3521 3522 net->unx.sysctl_max_dgram_qlen = 10; 3523 if (unix_sysctl_register(net)) 3524 goto out; 3525 3526 #ifdef CONFIG_PROC_FS 3527 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3528 sizeof(struct seq_net_private))) 3529 goto err_sysctl; 3530 #endif 3531 3532 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3533 sizeof(spinlock_t), GFP_KERNEL); 3534 if (!net->unx.table.locks) 3535 goto err_proc; 3536 3537 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3538 sizeof(struct hlist_head), 3539 GFP_KERNEL); 3540 if (!net->unx.table.buckets) 3541 goto free_locks; 3542 3543 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3544 spin_lock_init(&net->unx.table.locks[i]); 3545 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3546 } 3547 3548 return 0; 3549 3550 free_locks: 3551 kvfree(net->unx.table.locks); 3552 err_proc: 3553 #ifdef CONFIG_PROC_FS 3554 remove_proc_entry("unix", net->proc_net); 3555 err_sysctl: 3556 #endif 3557 unix_sysctl_unregister(net); 3558 out: 3559 return -ENOMEM; 3560 } 3561 3562 static void __net_exit unix_net_exit(struct net *net) 3563 { 3564 kvfree(net->unx.table.buckets); 3565 kvfree(net->unx.table.locks); 3566 unix_sysctl_unregister(net); 3567 remove_proc_entry("unix", net->proc_net); 3568 } 3569 3570 static struct pernet_operations unix_net_ops = { 3571 .init = unix_net_init, 3572 .exit = unix_net_exit, 3573 }; 3574 3575 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3576 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3577 struct unix_sock *unix_sk, uid_t uid) 3578 3579 #define INIT_BATCH_SZ 16 3580 3581 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3582 { 3583 struct bpf_unix_iter_state *iter = priv_data; 3584 int err; 3585 3586 err = bpf_iter_init_seq_net(priv_data, aux); 3587 if (err) 3588 return err; 3589 3590 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3591 if (err) { 3592 bpf_iter_fini_seq_net(priv_data); 3593 return err; 3594 } 3595 3596 return 0; 3597 } 3598 3599 static void bpf_iter_fini_unix(void *priv_data) 3600 { 3601 struct bpf_unix_iter_state *iter = priv_data; 3602 3603 bpf_iter_fini_seq_net(priv_data); 3604 kvfree(iter->batch); 3605 } 3606 3607 static const struct bpf_iter_seq_info unix_seq_info = { 3608 .seq_ops = &bpf_iter_unix_seq_ops, 3609 .init_seq_private = bpf_iter_init_unix, 3610 .fini_seq_private = bpf_iter_fini_unix, 3611 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3612 }; 3613 3614 static const struct bpf_func_proto * 3615 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3616 const struct bpf_prog *prog) 3617 { 3618 switch (func_id) { 3619 case BPF_FUNC_setsockopt: 3620 return &bpf_sk_setsockopt_proto; 3621 case BPF_FUNC_getsockopt: 3622 return &bpf_sk_getsockopt_proto; 3623 default: 3624 return NULL; 3625 } 3626 } 3627 3628 static struct bpf_iter_reg unix_reg_info = { 3629 .target = "unix", 3630 .ctx_arg_info_size = 1, 3631 .ctx_arg_info = { 3632 { offsetof(struct bpf_iter__unix, unix_sk), 3633 PTR_TO_BTF_ID_OR_NULL }, 3634 }, 3635 .get_func_proto = bpf_iter_unix_get_func_proto, 3636 .seq_info = &unix_seq_info, 3637 }; 3638 3639 static void __init bpf_iter_register(void) 3640 { 3641 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3642 if (bpf_iter_reg_target(&unix_reg_info)) 3643 pr_warn("Warning: could not register bpf iterator unix\n"); 3644 } 3645 #endif 3646 3647 static int __init af_unix_init(void) 3648 { 3649 int i, rc = -1; 3650 3651 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3652 3653 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3654 spin_lock_init(&bsd_socket_locks[i]); 3655 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3656 } 3657 3658 rc = proto_register(&unix_dgram_proto, 1); 3659 if (rc != 0) { 3660 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3661 goto out; 3662 } 3663 3664 rc = proto_register(&unix_stream_proto, 1); 3665 if (rc != 0) { 3666 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3667 proto_unregister(&unix_dgram_proto); 3668 goto out; 3669 } 3670 3671 sock_register(&unix_family_ops); 3672 register_pernet_subsys(&unix_net_ops); 3673 unix_bpf_build_proto(); 3674 3675 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3676 bpf_iter_register(); 3677 #endif 3678 3679 out: 3680 return rc; 3681 } 3682 3683 static void __exit af_unix_exit(void) 3684 { 3685 sock_unregister(PF_UNIX); 3686 proto_unregister(&unix_dgram_proto); 3687 proto_unregister(&unix_stream_proto); 3688 unregister_pernet_subsys(&unix_net_ops); 3689 } 3690 3691 /* Earlier than device_initcall() so that other drivers invoking 3692 request_module() don't end up in a loop when modprobe tries 3693 to use a UNIX socket. But later than subsys_initcall() because 3694 we depend on stuff initialised there */ 3695 fs_initcall(af_unix_init); 3696 module_exit(af_unix_exit); 3697 3698 MODULE_LICENSE("GPL"); 3699 MODULE_ALIAS_NETPROTO(PF_UNIX); 3700