1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/splice.h> 116 #include <linux/freezer.h> 117 #include <linux/file.h> 118 #include <linux/btf_ids.h> 119 120 #include "scm.h" 121 122 static atomic_long_t unix_nr_socks; 123 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 124 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 125 126 /* SMP locking strategy: 127 * hash table is protected with spinlock. 128 * each socket state is protected by separate spinlock. 129 */ 130 131 static unsigned int unix_unbound_hash(struct sock *sk) 132 { 133 unsigned long hash = (unsigned long)sk; 134 135 hash ^= hash >> 16; 136 hash ^= hash >> 8; 137 hash ^= sk->sk_type; 138 139 return hash & UNIX_HASH_MOD; 140 } 141 142 static unsigned int unix_bsd_hash(struct inode *i) 143 { 144 return i->i_ino & UNIX_HASH_MOD; 145 } 146 147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 148 int addr_len, int type) 149 { 150 __wsum csum = csum_partial(sunaddr, addr_len, 0); 151 unsigned int hash; 152 153 hash = (__force unsigned int)csum_fold(csum); 154 hash ^= hash >> 8; 155 hash ^= type; 156 157 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 158 } 159 160 static void unix_table_double_lock(struct net *net, 161 unsigned int hash1, unsigned int hash2) 162 { 163 if (hash1 == hash2) { 164 spin_lock(&net->unx.table.locks[hash1]); 165 return; 166 } 167 168 if (hash1 > hash2) 169 swap(hash1, hash2); 170 171 spin_lock(&net->unx.table.locks[hash1]); 172 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 173 } 174 175 static void unix_table_double_unlock(struct net *net, 176 unsigned int hash1, unsigned int hash2) 177 { 178 if (hash1 == hash2) { 179 spin_unlock(&net->unx.table.locks[hash1]); 180 return; 181 } 182 183 spin_unlock(&net->unx.table.locks[hash1]); 184 spin_unlock(&net->unx.table.locks[hash2]); 185 } 186 187 #ifdef CONFIG_SECURITY_NETWORK 188 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 189 { 190 UNIXCB(skb).secid = scm->secid; 191 } 192 193 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 194 { 195 scm->secid = UNIXCB(skb).secid; 196 } 197 198 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 199 { 200 return (scm->secid == UNIXCB(skb).secid); 201 } 202 #else 203 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 204 { } 205 206 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 207 { } 208 209 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 210 { 211 return true; 212 } 213 #endif /* CONFIG_SECURITY_NETWORK */ 214 215 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 216 { 217 return unix_peer(osk) == sk; 218 } 219 220 static inline int unix_may_send(struct sock *sk, struct sock *osk) 221 { 222 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 223 } 224 225 static inline int unix_recvq_full(const struct sock *sk) 226 { 227 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 228 } 229 230 static inline int unix_recvq_full_lockless(const struct sock *sk) 231 { 232 return skb_queue_len_lockless(&sk->sk_receive_queue) > 233 READ_ONCE(sk->sk_max_ack_backlog); 234 } 235 236 struct sock *unix_peer_get(struct sock *s) 237 { 238 struct sock *peer; 239 240 unix_state_lock(s); 241 peer = unix_peer(s); 242 if (peer) 243 sock_hold(peer); 244 unix_state_unlock(s); 245 return peer; 246 } 247 EXPORT_SYMBOL_GPL(unix_peer_get); 248 249 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 250 int addr_len) 251 { 252 struct unix_address *addr; 253 254 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 255 if (!addr) 256 return NULL; 257 258 refcount_set(&addr->refcnt, 1); 259 addr->len = addr_len; 260 memcpy(addr->name, sunaddr, addr_len); 261 262 return addr; 263 } 264 265 static inline void unix_release_addr(struct unix_address *addr) 266 { 267 if (refcount_dec_and_test(&addr->refcnt)) 268 kfree(addr); 269 } 270 271 /* 272 * Check unix socket name: 273 * - should be not zero length. 274 * - if started by not zero, should be NULL terminated (FS object) 275 * - if started by zero, it is abstract name. 276 */ 277 278 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 279 { 280 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 281 addr_len > sizeof(*sunaddr)) 282 return -EINVAL; 283 284 if (sunaddr->sun_family != AF_UNIX) 285 return -EINVAL; 286 287 return 0; 288 } 289 290 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 291 { 292 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr; 293 short offset = offsetof(struct sockaddr_storage, __data); 294 295 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path)); 296 297 /* This may look like an off by one error but it is a bit more 298 * subtle. 108 is the longest valid AF_UNIX path for a binding. 299 * sun_path[108] doesn't as such exist. However in kernel space 300 * we are guaranteed that it is a valid memory location in our 301 * kernel address buffer because syscall functions always pass 302 * a pointer of struct sockaddr_storage which has a bigger buffer 303 * than 108. Also, we must terminate sun_path for strlen() in 304 * getname_kernel(). 305 */ 306 addr->__data[addr_len - offset] = 0; 307 308 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will 309 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen() 310 * know the actual buffer. 311 */ 312 return strlen(addr->__data) + offset + 1; 313 } 314 315 static void __unix_remove_socket(struct sock *sk) 316 { 317 sk_del_node_init(sk); 318 } 319 320 static void __unix_insert_socket(struct net *net, struct sock *sk) 321 { 322 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 323 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 324 } 325 326 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 327 struct unix_address *addr, unsigned int hash) 328 { 329 __unix_remove_socket(sk); 330 smp_store_release(&unix_sk(sk)->addr, addr); 331 332 sk->sk_hash = hash; 333 __unix_insert_socket(net, sk); 334 } 335 336 static void unix_remove_socket(struct net *net, struct sock *sk) 337 { 338 spin_lock(&net->unx.table.locks[sk->sk_hash]); 339 __unix_remove_socket(sk); 340 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 341 } 342 343 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 344 { 345 spin_lock(&net->unx.table.locks[sk->sk_hash]); 346 __unix_insert_socket(net, sk); 347 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 348 } 349 350 static void unix_insert_bsd_socket(struct sock *sk) 351 { 352 spin_lock(&bsd_socket_locks[sk->sk_hash]); 353 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 354 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 355 } 356 357 static void unix_remove_bsd_socket(struct sock *sk) 358 { 359 if (!hlist_unhashed(&sk->sk_bind_node)) { 360 spin_lock(&bsd_socket_locks[sk->sk_hash]); 361 __sk_del_bind_node(sk); 362 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 363 364 sk_node_init(&sk->sk_bind_node); 365 } 366 } 367 368 static struct sock *__unix_find_socket_byname(struct net *net, 369 struct sockaddr_un *sunname, 370 int len, unsigned int hash) 371 { 372 struct sock *s; 373 374 sk_for_each(s, &net->unx.table.buckets[hash]) { 375 struct unix_sock *u = unix_sk(s); 376 377 if (u->addr->len == len && 378 !memcmp(u->addr->name, sunname, len)) 379 return s; 380 } 381 return NULL; 382 } 383 384 static inline struct sock *unix_find_socket_byname(struct net *net, 385 struct sockaddr_un *sunname, 386 int len, unsigned int hash) 387 { 388 struct sock *s; 389 390 spin_lock(&net->unx.table.locks[hash]); 391 s = __unix_find_socket_byname(net, sunname, len, hash); 392 if (s) 393 sock_hold(s); 394 spin_unlock(&net->unx.table.locks[hash]); 395 return s; 396 } 397 398 static struct sock *unix_find_socket_byinode(struct inode *i) 399 { 400 unsigned int hash = unix_bsd_hash(i); 401 struct sock *s; 402 403 spin_lock(&bsd_socket_locks[hash]); 404 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 405 struct dentry *dentry = unix_sk(s)->path.dentry; 406 407 if (dentry && d_backing_inode(dentry) == i) { 408 sock_hold(s); 409 spin_unlock(&bsd_socket_locks[hash]); 410 return s; 411 } 412 } 413 spin_unlock(&bsd_socket_locks[hash]); 414 return NULL; 415 } 416 417 /* Support code for asymmetrically connected dgram sockets 418 * 419 * If a datagram socket is connected to a socket not itself connected 420 * to the first socket (eg, /dev/log), clients may only enqueue more 421 * messages if the present receive queue of the server socket is not 422 * "too large". This means there's a second writeability condition 423 * poll and sendmsg need to test. The dgram recv code will do a wake 424 * up on the peer_wait wait queue of a socket upon reception of a 425 * datagram which needs to be propagated to sleeping would-be writers 426 * since these might not have sent anything so far. This can't be 427 * accomplished via poll_wait because the lifetime of the server 428 * socket might be less than that of its clients if these break their 429 * association with it or if the server socket is closed while clients 430 * are still connected to it and there's no way to inform "a polling 431 * implementation" that it should let go of a certain wait queue 432 * 433 * In order to propagate a wake up, a wait_queue_entry_t of the client 434 * socket is enqueued on the peer_wait queue of the server socket 435 * whose wake function does a wake_up on the ordinary client socket 436 * wait queue. This connection is established whenever a write (or 437 * poll for write) hit the flow control condition and broken when the 438 * association to the server socket is dissolved or after a wake up 439 * was relayed. 440 */ 441 442 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 443 void *key) 444 { 445 struct unix_sock *u; 446 wait_queue_head_t *u_sleep; 447 448 u = container_of(q, struct unix_sock, peer_wake); 449 450 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 451 q); 452 u->peer_wake.private = NULL; 453 454 /* relaying can only happen while the wq still exists */ 455 u_sleep = sk_sleep(&u->sk); 456 if (u_sleep) 457 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 458 459 return 0; 460 } 461 462 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 463 { 464 struct unix_sock *u, *u_other; 465 int rc; 466 467 u = unix_sk(sk); 468 u_other = unix_sk(other); 469 rc = 0; 470 spin_lock(&u_other->peer_wait.lock); 471 472 if (!u->peer_wake.private) { 473 u->peer_wake.private = other; 474 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 475 476 rc = 1; 477 } 478 479 spin_unlock(&u_other->peer_wait.lock); 480 return rc; 481 } 482 483 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 484 struct sock *other) 485 { 486 struct unix_sock *u, *u_other; 487 488 u = unix_sk(sk); 489 u_other = unix_sk(other); 490 spin_lock(&u_other->peer_wait.lock); 491 492 if (u->peer_wake.private == other) { 493 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 494 u->peer_wake.private = NULL; 495 } 496 497 spin_unlock(&u_other->peer_wait.lock); 498 } 499 500 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 501 struct sock *other) 502 { 503 unix_dgram_peer_wake_disconnect(sk, other); 504 wake_up_interruptible_poll(sk_sleep(sk), 505 EPOLLOUT | 506 EPOLLWRNORM | 507 EPOLLWRBAND); 508 } 509 510 /* preconditions: 511 * - unix_peer(sk) == other 512 * - association is stable 513 */ 514 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 515 { 516 int connected; 517 518 connected = unix_dgram_peer_wake_connect(sk, other); 519 520 /* If other is SOCK_DEAD, we want to make sure we signal 521 * POLLOUT, such that a subsequent write() can get a 522 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 523 * to other and its full, we will hang waiting for POLLOUT. 524 */ 525 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 526 return 1; 527 528 if (connected) 529 unix_dgram_peer_wake_disconnect(sk, other); 530 531 return 0; 532 } 533 534 static int unix_writable(const struct sock *sk) 535 { 536 return sk->sk_state != TCP_LISTEN && 537 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 538 } 539 540 static void unix_write_space(struct sock *sk) 541 { 542 struct socket_wq *wq; 543 544 rcu_read_lock(); 545 if (unix_writable(sk)) { 546 wq = rcu_dereference(sk->sk_wq); 547 if (skwq_has_sleeper(wq)) 548 wake_up_interruptible_sync_poll(&wq->wait, 549 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 550 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 551 } 552 rcu_read_unlock(); 553 } 554 555 /* When dgram socket disconnects (or changes its peer), we clear its receive 556 * queue of packets arrived from previous peer. First, it allows to do 557 * flow control based only on wmem_alloc; second, sk connected to peer 558 * may receive messages only from that peer. */ 559 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 560 { 561 if (!skb_queue_empty(&sk->sk_receive_queue)) { 562 skb_queue_purge(&sk->sk_receive_queue); 563 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 564 565 /* If one link of bidirectional dgram pipe is disconnected, 566 * we signal error. Messages are lost. Do not make this, 567 * when peer was not connected to us. 568 */ 569 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 570 WRITE_ONCE(other->sk_err, ECONNRESET); 571 sk_error_report(other); 572 } 573 } 574 other->sk_state = TCP_CLOSE; 575 } 576 577 static void unix_sock_destructor(struct sock *sk) 578 { 579 struct unix_sock *u = unix_sk(sk); 580 581 skb_queue_purge(&sk->sk_receive_queue); 582 583 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 584 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 585 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 586 if (!sock_flag(sk, SOCK_DEAD)) { 587 pr_info("Attempt to release alive unix socket: %p\n", sk); 588 return; 589 } 590 591 if (u->addr) 592 unix_release_addr(u->addr); 593 594 atomic_long_dec(&unix_nr_socks); 595 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 596 #ifdef UNIX_REFCNT_DEBUG 597 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 598 atomic_long_read(&unix_nr_socks)); 599 #endif 600 } 601 602 static void unix_release_sock(struct sock *sk, int embrion) 603 { 604 struct unix_sock *u = unix_sk(sk); 605 struct sock *skpair; 606 struct sk_buff *skb; 607 struct path path; 608 int state; 609 610 unix_remove_socket(sock_net(sk), sk); 611 unix_remove_bsd_socket(sk); 612 613 /* Clear state */ 614 unix_state_lock(sk); 615 sock_orphan(sk); 616 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); 617 path = u->path; 618 u->path.dentry = NULL; 619 u->path.mnt = NULL; 620 state = sk->sk_state; 621 sk->sk_state = TCP_CLOSE; 622 623 skpair = unix_peer(sk); 624 unix_peer(sk) = NULL; 625 626 unix_state_unlock(sk); 627 628 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 629 if (u->oob_skb) { 630 kfree_skb(u->oob_skb); 631 u->oob_skb = NULL; 632 } 633 #endif 634 635 wake_up_interruptible_all(&u->peer_wait); 636 637 if (skpair != NULL) { 638 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 639 unix_state_lock(skpair); 640 /* No more writes */ 641 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); 642 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 643 WRITE_ONCE(skpair->sk_err, ECONNRESET); 644 unix_state_unlock(skpair); 645 skpair->sk_state_change(skpair); 646 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 647 } 648 649 unix_dgram_peer_wake_disconnect(sk, skpair); 650 sock_put(skpair); /* It may now die */ 651 } 652 653 /* Try to flush out this socket. Throw out buffers at least */ 654 655 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 656 if (state == TCP_LISTEN) 657 unix_release_sock(skb->sk, 1); 658 /* passed fds are erased in the kfree_skb hook */ 659 UNIXCB(skb).consumed = skb->len; 660 kfree_skb(skb); 661 } 662 663 if (path.dentry) 664 path_put(&path); 665 666 sock_put(sk); 667 668 /* ---- Socket is dead now and most probably destroyed ---- */ 669 670 /* 671 * Fixme: BSD difference: In BSD all sockets connected to us get 672 * ECONNRESET and we die on the spot. In Linux we behave 673 * like files and pipes do and wait for the last 674 * dereference. 675 * 676 * Can't we simply set sock->err? 677 * 678 * What the above comment does talk about? --ANK(980817) 679 */ 680 681 if (READ_ONCE(unix_tot_inflight)) 682 unix_gc(); /* Garbage collect fds */ 683 } 684 685 static void init_peercred(struct sock *sk) 686 { 687 const struct cred *old_cred; 688 struct pid *old_pid; 689 690 spin_lock(&sk->sk_peer_lock); 691 old_pid = sk->sk_peer_pid; 692 old_cred = sk->sk_peer_cred; 693 sk->sk_peer_pid = get_pid(task_tgid(current)); 694 sk->sk_peer_cred = get_current_cred(); 695 spin_unlock(&sk->sk_peer_lock); 696 697 put_pid(old_pid); 698 put_cred(old_cred); 699 } 700 701 static void copy_peercred(struct sock *sk, struct sock *peersk) 702 { 703 const struct cred *old_cred; 704 struct pid *old_pid; 705 706 if (sk < peersk) { 707 spin_lock(&sk->sk_peer_lock); 708 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 709 } else { 710 spin_lock(&peersk->sk_peer_lock); 711 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 712 } 713 old_pid = sk->sk_peer_pid; 714 old_cred = sk->sk_peer_cred; 715 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 716 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 717 718 spin_unlock(&sk->sk_peer_lock); 719 spin_unlock(&peersk->sk_peer_lock); 720 721 put_pid(old_pid); 722 put_cred(old_cred); 723 } 724 725 static int unix_listen(struct socket *sock, int backlog) 726 { 727 int err; 728 struct sock *sk = sock->sk; 729 struct unix_sock *u = unix_sk(sk); 730 731 err = -EOPNOTSUPP; 732 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 733 goto out; /* Only stream/seqpacket sockets accept */ 734 err = -EINVAL; 735 if (!READ_ONCE(u->addr)) 736 goto out; /* No listens on an unbound socket */ 737 unix_state_lock(sk); 738 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 739 goto out_unlock; 740 if (backlog > sk->sk_max_ack_backlog) 741 wake_up_interruptible_all(&u->peer_wait); 742 sk->sk_max_ack_backlog = backlog; 743 sk->sk_state = TCP_LISTEN; 744 /* set credentials so connect can copy them */ 745 init_peercred(sk); 746 err = 0; 747 748 out_unlock: 749 unix_state_unlock(sk); 750 out: 751 return err; 752 } 753 754 static int unix_release(struct socket *); 755 static int unix_bind(struct socket *, struct sockaddr *, int); 756 static int unix_stream_connect(struct socket *, struct sockaddr *, 757 int addr_len, int flags); 758 static int unix_socketpair(struct socket *, struct socket *); 759 static int unix_accept(struct socket *, struct socket *, int, bool); 760 static int unix_getname(struct socket *, struct sockaddr *, int); 761 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 762 static __poll_t unix_dgram_poll(struct file *, struct socket *, 763 poll_table *); 764 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 765 #ifdef CONFIG_COMPAT 766 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 767 #endif 768 static int unix_shutdown(struct socket *, int); 769 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 770 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 771 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 772 struct pipe_inode_info *, size_t size, 773 unsigned int flags); 774 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 775 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 776 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 777 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 778 static int unix_dgram_connect(struct socket *, struct sockaddr *, 779 int, int); 780 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 781 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 782 int); 783 784 static int unix_set_peek_off(struct sock *sk, int val) 785 { 786 struct unix_sock *u = unix_sk(sk); 787 788 if (mutex_lock_interruptible(&u->iolock)) 789 return -EINTR; 790 791 WRITE_ONCE(sk->sk_peek_off, val); 792 mutex_unlock(&u->iolock); 793 794 return 0; 795 } 796 797 #ifdef CONFIG_PROC_FS 798 static int unix_count_nr_fds(struct sock *sk) 799 { 800 struct sk_buff *skb; 801 struct unix_sock *u; 802 int nr_fds = 0; 803 804 spin_lock(&sk->sk_receive_queue.lock); 805 skb = skb_peek(&sk->sk_receive_queue); 806 while (skb) { 807 u = unix_sk(skb->sk); 808 nr_fds += atomic_read(&u->scm_stat.nr_fds); 809 skb = skb_peek_next(skb, &sk->sk_receive_queue); 810 } 811 spin_unlock(&sk->sk_receive_queue.lock); 812 813 return nr_fds; 814 } 815 816 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 817 { 818 struct sock *sk = sock->sk; 819 unsigned char s_state; 820 struct unix_sock *u; 821 int nr_fds = 0; 822 823 if (sk) { 824 s_state = READ_ONCE(sk->sk_state); 825 u = unix_sk(sk); 826 827 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their 828 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN. 829 * SOCK_DGRAM is ordinary. So, no lock is needed. 830 */ 831 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED) 832 nr_fds = atomic_read(&u->scm_stat.nr_fds); 833 else if (s_state == TCP_LISTEN) 834 nr_fds = unix_count_nr_fds(sk); 835 836 seq_printf(m, "scm_fds: %u\n", nr_fds); 837 } 838 } 839 #else 840 #define unix_show_fdinfo NULL 841 #endif 842 843 static const struct proto_ops unix_stream_ops = { 844 .family = PF_UNIX, 845 .owner = THIS_MODULE, 846 .release = unix_release, 847 .bind = unix_bind, 848 .connect = unix_stream_connect, 849 .socketpair = unix_socketpair, 850 .accept = unix_accept, 851 .getname = unix_getname, 852 .poll = unix_poll, 853 .ioctl = unix_ioctl, 854 #ifdef CONFIG_COMPAT 855 .compat_ioctl = unix_compat_ioctl, 856 #endif 857 .listen = unix_listen, 858 .shutdown = unix_shutdown, 859 .sendmsg = unix_stream_sendmsg, 860 .recvmsg = unix_stream_recvmsg, 861 .read_skb = unix_stream_read_skb, 862 .mmap = sock_no_mmap, 863 .splice_read = unix_stream_splice_read, 864 .set_peek_off = unix_set_peek_off, 865 .show_fdinfo = unix_show_fdinfo, 866 }; 867 868 static const struct proto_ops unix_dgram_ops = { 869 .family = PF_UNIX, 870 .owner = THIS_MODULE, 871 .release = unix_release, 872 .bind = unix_bind, 873 .connect = unix_dgram_connect, 874 .socketpair = unix_socketpair, 875 .accept = sock_no_accept, 876 .getname = unix_getname, 877 .poll = unix_dgram_poll, 878 .ioctl = unix_ioctl, 879 #ifdef CONFIG_COMPAT 880 .compat_ioctl = unix_compat_ioctl, 881 #endif 882 .listen = sock_no_listen, 883 .shutdown = unix_shutdown, 884 .sendmsg = unix_dgram_sendmsg, 885 .read_skb = unix_read_skb, 886 .recvmsg = unix_dgram_recvmsg, 887 .mmap = sock_no_mmap, 888 .set_peek_off = unix_set_peek_off, 889 .show_fdinfo = unix_show_fdinfo, 890 }; 891 892 static const struct proto_ops unix_seqpacket_ops = { 893 .family = PF_UNIX, 894 .owner = THIS_MODULE, 895 .release = unix_release, 896 .bind = unix_bind, 897 .connect = unix_stream_connect, 898 .socketpair = unix_socketpair, 899 .accept = unix_accept, 900 .getname = unix_getname, 901 .poll = unix_dgram_poll, 902 .ioctl = unix_ioctl, 903 #ifdef CONFIG_COMPAT 904 .compat_ioctl = unix_compat_ioctl, 905 #endif 906 .listen = unix_listen, 907 .shutdown = unix_shutdown, 908 .sendmsg = unix_seqpacket_sendmsg, 909 .recvmsg = unix_seqpacket_recvmsg, 910 .mmap = sock_no_mmap, 911 .set_peek_off = unix_set_peek_off, 912 .show_fdinfo = unix_show_fdinfo, 913 }; 914 915 static void unix_close(struct sock *sk, long timeout) 916 { 917 /* Nothing to do here, unix socket does not need a ->close(). 918 * This is merely for sockmap. 919 */ 920 } 921 922 static void unix_unhash(struct sock *sk) 923 { 924 /* Nothing to do here, unix socket does not need a ->unhash(). 925 * This is merely for sockmap. 926 */ 927 } 928 929 static bool unix_bpf_bypass_getsockopt(int level, int optname) 930 { 931 if (level == SOL_SOCKET) { 932 switch (optname) { 933 case SO_PEERPIDFD: 934 return true; 935 default: 936 return false; 937 } 938 } 939 940 return false; 941 } 942 943 struct proto unix_dgram_proto = { 944 .name = "UNIX", 945 .owner = THIS_MODULE, 946 .obj_size = sizeof(struct unix_sock), 947 .close = unix_close, 948 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 949 #ifdef CONFIG_BPF_SYSCALL 950 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 951 #endif 952 }; 953 954 struct proto unix_stream_proto = { 955 .name = "UNIX-STREAM", 956 .owner = THIS_MODULE, 957 .obj_size = sizeof(struct unix_sock), 958 .close = unix_close, 959 .unhash = unix_unhash, 960 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 961 #ifdef CONFIG_BPF_SYSCALL 962 .psock_update_sk_prot = unix_stream_bpf_update_proto, 963 #endif 964 }; 965 966 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 967 { 968 struct unix_sock *u; 969 struct sock *sk; 970 int err; 971 972 atomic_long_inc(&unix_nr_socks); 973 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 974 err = -ENFILE; 975 goto err; 976 } 977 978 if (type == SOCK_STREAM) 979 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 980 else /*dgram and seqpacket */ 981 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 982 983 if (!sk) { 984 err = -ENOMEM; 985 goto err; 986 } 987 988 sock_init_data(sock, sk); 989 990 sk->sk_hash = unix_unbound_hash(sk); 991 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 992 sk->sk_write_space = unix_write_space; 993 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 994 sk->sk_destruct = unix_sock_destructor; 995 u = unix_sk(sk); 996 u->inflight = 0; 997 u->path.dentry = NULL; 998 u->path.mnt = NULL; 999 spin_lock_init(&u->lock); 1000 INIT_LIST_HEAD(&u->link); 1001 mutex_init(&u->iolock); /* single task reading lock */ 1002 mutex_init(&u->bindlock); /* single task binding lock */ 1003 init_waitqueue_head(&u->peer_wait); 1004 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 1005 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 1006 unix_insert_unbound_socket(net, sk); 1007 1008 sock_prot_inuse_add(net, sk->sk_prot, 1); 1009 1010 return sk; 1011 1012 err: 1013 atomic_long_dec(&unix_nr_socks); 1014 return ERR_PTR(err); 1015 } 1016 1017 static int unix_create(struct net *net, struct socket *sock, int protocol, 1018 int kern) 1019 { 1020 struct sock *sk; 1021 1022 if (protocol && protocol != PF_UNIX) 1023 return -EPROTONOSUPPORT; 1024 1025 sock->state = SS_UNCONNECTED; 1026 1027 switch (sock->type) { 1028 case SOCK_STREAM: 1029 sock->ops = &unix_stream_ops; 1030 break; 1031 /* 1032 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 1033 * nothing uses it. 1034 */ 1035 case SOCK_RAW: 1036 sock->type = SOCK_DGRAM; 1037 fallthrough; 1038 case SOCK_DGRAM: 1039 sock->ops = &unix_dgram_ops; 1040 break; 1041 case SOCK_SEQPACKET: 1042 sock->ops = &unix_seqpacket_ops; 1043 break; 1044 default: 1045 return -ESOCKTNOSUPPORT; 1046 } 1047 1048 sk = unix_create1(net, sock, kern, sock->type); 1049 if (IS_ERR(sk)) 1050 return PTR_ERR(sk); 1051 1052 return 0; 1053 } 1054 1055 static int unix_release(struct socket *sock) 1056 { 1057 struct sock *sk = sock->sk; 1058 1059 if (!sk) 1060 return 0; 1061 1062 sk->sk_prot->close(sk, 0); 1063 unix_release_sock(sk, 0); 1064 sock->sk = NULL; 1065 1066 return 0; 1067 } 1068 1069 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1070 int type) 1071 { 1072 struct inode *inode; 1073 struct path path; 1074 struct sock *sk; 1075 int err; 1076 1077 unix_mkname_bsd(sunaddr, addr_len); 1078 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1079 if (err) 1080 goto fail; 1081 1082 err = path_permission(&path, MAY_WRITE); 1083 if (err) 1084 goto path_put; 1085 1086 err = -ECONNREFUSED; 1087 inode = d_backing_inode(path.dentry); 1088 if (!S_ISSOCK(inode->i_mode)) 1089 goto path_put; 1090 1091 sk = unix_find_socket_byinode(inode); 1092 if (!sk) 1093 goto path_put; 1094 1095 err = -EPROTOTYPE; 1096 if (sk->sk_type == type) 1097 touch_atime(&path); 1098 else 1099 goto sock_put; 1100 1101 path_put(&path); 1102 1103 return sk; 1104 1105 sock_put: 1106 sock_put(sk); 1107 path_put: 1108 path_put(&path); 1109 fail: 1110 return ERR_PTR(err); 1111 } 1112 1113 static struct sock *unix_find_abstract(struct net *net, 1114 struct sockaddr_un *sunaddr, 1115 int addr_len, int type) 1116 { 1117 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1118 struct dentry *dentry; 1119 struct sock *sk; 1120 1121 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1122 if (!sk) 1123 return ERR_PTR(-ECONNREFUSED); 1124 1125 dentry = unix_sk(sk)->path.dentry; 1126 if (dentry) 1127 touch_atime(&unix_sk(sk)->path); 1128 1129 return sk; 1130 } 1131 1132 static struct sock *unix_find_other(struct net *net, 1133 struct sockaddr_un *sunaddr, 1134 int addr_len, int type) 1135 { 1136 struct sock *sk; 1137 1138 if (sunaddr->sun_path[0]) 1139 sk = unix_find_bsd(sunaddr, addr_len, type); 1140 else 1141 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1142 1143 return sk; 1144 } 1145 1146 static int unix_autobind(struct sock *sk) 1147 { 1148 struct unix_sock *u = unix_sk(sk); 1149 unsigned int new_hash, old_hash; 1150 struct net *net = sock_net(sk); 1151 struct unix_address *addr; 1152 u32 lastnum, ordernum; 1153 int err; 1154 1155 err = mutex_lock_interruptible(&u->bindlock); 1156 if (err) 1157 return err; 1158 1159 if (u->addr) 1160 goto out; 1161 1162 err = -ENOMEM; 1163 addr = kzalloc(sizeof(*addr) + 1164 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1165 if (!addr) 1166 goto out; 1167 1168 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1169 addr->name->sun_family = AF_UNIX; 1170 refcount_set(&addr->refcnt, 1); 1171 1172 old_hash = sk->sk_hash; 1173 ordernum = get_random_u32(); 1174 lastnum = ordernum & 0xFFFFF; 1175 retry: 1176 ordernum = (ordernum + 1) & 0xFFFFF; 1177 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1178 1179 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1180 unix_table_double_lock(net, old_hash, new_hash); 1181 1182 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1183 unix_table_double_unlock(net, old_hash, new_hash); 1184 1185 /* __unix_find_socket_byname() may take long time if many names 1186 * are already in use. 1187 */ 1188 cond_resched(); 1189 1190 if (ordernum == lastnum) { 1191 /* Give up if all names seems to be in use. */ 1192 err = -ENOSPC; 1193 unix_release_addr(addr); 1194 goto out; 1195 } 1196 1197 goto retry; 1198 } 1199 1200 __unix_set_addr_hash(net, sk, addr, new_hash); 1201 unix_table_double_unlock(net, old_hash, new_hash); 1202 err = 0; 1203 1204 out: mutex_unlock(&u->bindlock); 1205 return err; 1206 } 1207 1208 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1209 int addr_len) 1210 { 1211 umode_t mode = S_IFSOCK | 1212 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1213 struct unix_sock *u = unix_sk(sk); 1214 unsigned int new_hash, old_hash; 1215 struct net *net = sock_net(sk); 1216 struct mnt_idmap *idmap; 1217 struct unix_address *addr; 1218 struct dentry *dentry; 1219 struct path parent; 1220 int err; 1221 1222 addr_len = unix_mkname_bsd(sunaddr, addr_len); 1223 addr = unix_create_addr(sunaddr, addr_len); 1224 if (!addr) 1225 return -ENOMEM; 1226 1227 /* 1228 * Get the parent directory, calculate the hash for last 1229 * component. 1230 */ 1231 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1232 if (IS_ERR(dentry)) { 1233 err = PTR_ERR(dentry); 1234 goto out; 1235 } 1236 1237 /* 1238 * All right, let's create it. 1239 */ 1240 idmap = mnt_idmap(parent.mnt); 1241 err = security_path_mknod(&parent, dentry, mode, 0); 1242 if (!err) 1243 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); 1244 if (err) 1245 goto out_path; 1246 err = mutex_lock_interruptible(&u->bindlock); 1247 if (err) 1248 goto out_unlink; 1249 if (u->addr) 1250 goto out_unlock; 1251 1252 old_hash = sk->sk_hash; 1253 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1254 unix_table_double_lock(net, old_hash, new_hash); 1255 u->path.mnt = mntget(parent.mnt); 1256 u->path.dentry = dget(dentry); 1257 __unix_set_addr_hash(net, sk, addr, new_hash); 1258 unix_table_double_unlock(net, old_hash, new_hash); 1259 unix_insert_bsd_socket(sk); 1260 mutex_unlock(&u->bindlock); 1261 done_path_create(&parent, dentry); 1262 return 0; 1263 1264 out_unlock: 1265 mutex_unlock(&u->bindlock); 1266 err = -EINVAL; 1267 out_unlink: 1268 /* failed after successful mknod? unlink what we'd created... */ 1269 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL); 1270 out_path: 1271 done_path_create(&parent, dentry); 1272 out: 1273 unix_release_addr(addr); 1274 return err == -EEXIST ? -EADDRINUSE : err; 1275 } 1276 1277 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1278 int addr_len) 1279 { 1280 struct unix_sock *u = unix_sk(sk); 1281 unsigned int new_hash, old_hash; 1282 struct net *net = sock_net(sk); 1283 struct unix_address *addr; 1284 int err; 1285 1286 addr = unix_create_addr(sunaddr, addr_len); 1287 if (!addr) 1288 return -ENOMEM; 1289 1290 err = mutex_lock_interruptible(&u->bindlock); 1291 if (err) 1292 goto out; 1293 1294 if (u->addr) { 1295 err = -EINVAL; 1296 goto out_mutex; 1297 } 1298 1299 old_hash = sk->sk_hash; 1300 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1301 unix_table_double_lock(net, old_hash, new_hash); 1302 1303 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1304 goto out_spin; 1305 1306 __unix_set_addr_hash(net, sk, addr, new_hash); 1307 unix_table_double_unlock(net, old_hash, new_hash); 1308 mutex_unlock(&u->bindlock); 1309 return 0; 1310 1311 out_spin: 1312 unix_table_double_unlock(net, old_hash, new_hash); 1313 err = -EADDRINUSE; 1314 out_mutex: 1315 mutex_unlock(&u->bindlock); 1316 out: 1317 unix_release_addr(addr); 1318 return err; 1319 } 1320 1321 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1322 { 1323 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1324 struct sock *sk = sock->sk; 1325 int err; 1326 1327 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1328 sunaddr->sun_family == AF_UNIX) 1329 return unix_autobind(sk); 1330 1331 err = unix_validate_addr(sunaddr, addr_len); 1332 if (err) 1333 return err; 1334 1335 if (sunaddr->sun_path[0]) 1336 err = unix_bind_bsd(sk, sunaddr, addr_len); 1337 else 1338 err = unix_bind_abstract(sk, sunaddr, addr_len); 1339 1340 return err; 1341 } 1342 1343 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1344 { 1345 if (unlikely(sk1 == sk2) || !sk2) { 1346 unix_state_lock(sk1); 1347 return; 1348 } 1349 if (sk1 > sk2) 1350 swap(sk1, sk2); 1351 1352 unix_state_lock(sk1); 1353 unix_state_lock_nested(sk2, U_LOCK_SECOND); 1354 } 1355 1356 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1357 { 1358 if (unlikely(sk1 == sk2) || !sk2) { 1359 unix_state_unlock(sk1); 1360 return; 1361 } 1362 unix_state_unlock(sk1); 1363 unix_state_unlock(sk2); 1364 } 1365 1366 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1367 int alen, int flags) 1368 { 1369 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1370 struct sock *sk = sock->sk; 1371 struct sock *other; 1372 int err; 1373 1374 err = -EINVAL; 1375 if (alen < offsetofend(struct sockaddr, sa_family)) 1376 goto out; 1377 1378 if (addr->sa_family != AF_UNSPEC) { 1379 err = unix_validate_addr(sunaddr, alen); 1380 if (err) 1381 goto out; 1382 1383 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1384 test_bit(SOCK_PASSPIDFD, &sock->flags)) && 1385 !READ_ONCE(unix_sk(sk)->addr)) { 1386 err = unix_autobind(sk); 1387 if (err) 1388 goto out; 1389 } 1390 1391 restart: 1392 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1393 if (IS_ERR(other)) { 1394 err = PTR_ERR(other); 1395 goto out; 1396 } 1397 1398 unix_state_double_lock(sk, other); 1399 1400 /* Apparently VFS overslept socket death. Retry. */ 1401 if (sock_flag(other, SOCK_DEAD)) { 1402 unix_state_double_unlock(sk, other); 1403 sock_put(other); 1404 goto restart; 1405 } 1406 1407 err = -EPERM; 1408 if (!unix_may_send(sk, other)) 1409 goto out_unlock; 1410 1411 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1412 if (err) 1413 goto out_unlock; 1414 1415 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1416 } else { 1417 /* 1418 * 1003.1g breaking connected state with AF_UNSPEC 1419 */ 1420 other = NULL; 1421 unix_state_double_lock(sk, other); 1422 } 1423 1424 /* 1425 * If it was connected, reconnect. 1426 */ 1427 if (unix_peer(sk)) { 1428 struct sock *old_peer = unix_peer(sk); 1429 1430 unix_peer(sk) = other; 1431 if (!other) 1432 sk->sk_state = TCP_CLOSE; 1433 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1434 1435 unix_state_double_unlock(sk, other); 1436 1437 if (other != old_peer) 1438 unix_dgram_disconnected(sk, old_peer); 1439 sock_put(old_peer); 1440 } else { 1441 unix_peer(sk) = other; 1442 unix_state_double_unlock(sk, other); 1443 } 1444 1445 return 0; 1446 1447 out_unlock: 1448 unix_state_double_unlock(sk, other); 1449 sock_put(other); 1450 out: 1451 return err; 1452 } 1453 1454 static long unix_wait_for_peer(struct sock *other, long timeo) 1455 __releases(&unix_sk(other)->lock) 1456 { 1457 struct unix_sock *u = unix_sk(other); 1458 int sched; 1459 DEFINE_WAIT(wait); 1460 1461 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1462 1463 sched = !sock_flag(other, SOCK_DEAD) && 1464 !(other->sk_shutdown & RCV_SHUTDOWN) && 1465 unix_recvq_full_lockless(other); 1466 1467 unix_state_unlock(other); 1468 1469 if (sched) 1470 timeo = schedule_timeout(timeo); 1471 1472 finish_wait(&u->peer_wait, &wait); 1473 return timeo; 1474 } 1475 1476 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1477 int addr_len, int flags) 1478 { 1479 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1480 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1481 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1482 struct net *net = sock_net(sk); 1483 struct sk_buff *skb = NULL; 1484 long timeo; 1485 int err; 1486 int st; 1487 1488 err = unix_validate_addr(sunaddr, addr_len); 1489 if (err) 1490 goto out; 1491 1492 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1493 test_bit(SOCK_PASSPIDFD, &sock->flags)) && 1494 !READ_ONCE(u->addr)) { 1495 err = unix_autobind(sk); 1496 if (err) 1497 goto out; 1498 } 1499 1500 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1501 1502 /* First of all allocate resources. 1503 If we will make it after state is locked, 1504 we will have to recheck all again in any case. 1505 */ 1506 1507 /* create new sock for complete connection */ 1508 newsk = unix_create1(net, NULL, 0, sock->type); 1509 if (IS_ERR(newsk)) { 1510 err = PTR_ERR(newsk); 1511 newsk = NULL; 1512 goto out; 1513 } 1514 1515 err = -ENOMEM; 1516 1517 /* Allocate skb for sending to listening sock */ 1518 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1519 if (skb == NULL) 1520 goto out; 1521 1522 restart: 1523 /* Find listening sock. */ 1524 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1525 if (IS_ERR(other)) { 1526 err = PTR_ERR(other); 1527 other = NULL; 1528 goto out; 1529 } 1530 1531 /* Latch state of peer */ 1532 unix_state_lock(other); 1533 1534 /* Apparently VFS overslept socket death. Retry. */ 1535 if (sock_flag(other, SOCK_DEAD)) { 1536 unix_state_unlock(other); 1537 sock_put(other); 1538 goto restart; 1539 } 1540 1541 err = -ECONNREFUSED; 1542 if (other->sk_state != TCP_LISTEN) 1543 goto out_unlock; 1544 if (other->sk_shutdown & RCV_SHUTDOWN) 1545 goto out_unlock; 1546 1547 if (unix_recvq_full(other)) { 1548 err = -EAGAIN; 1549 if (!timeo) 1550 goto out_unlock; 1551 1552 timeo = unix_wait_for_peer(other, timeo); 1553 1554 err = sock_intr_errno(timeo); 1555 if (signal_pending(current)) 1556 goto out; 1557 sock_put(other); 1558 goto restart; 1559 } 1560 1561 /* Latch our state. 1562 1563 It is tricky place. We need to grab our state lock and cannot 1564 drop lock on peer. It is dangerous because deadlock is 1565 possible. Connect to self case and simultaneous 1566 attempt to connect are eliminated by checking socket 1567 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1568 check this before attempt to grab lock. 1569 1570 Well, and we have to recheck the state after socket locked. 1571 */ 1572 st = sk->sk_state; 1573 1574 switch (st) { 1575 case TCP_CLOSE: 1576 /* This is ok... continue with connect */ 1577 break; 1578 case TCP_ESTABLISHED: 1579 /* Socket is already connected */ 1580 err = -EISCONN; 1581 goto out_unlock; 1582 default: 1583 err = -EINVAL; 1584 goto out_unlock; 1585 } 1586 1587 unix_state_lock_nested(sk, U_LOCK_SECOND); 1588 1589 if (sk->sk_state != st) { 1590 unix_state_unlock(sk); 1591 unix_state_unlock(other); 1592 sock_put(other); 1593 goto restart; 1594 } 1595 1596 err = security_unix_stream_connect(sk, other, newsk); 1597 if (err) { 1598 unix_state_unlock(sk); 1599 goto out_unlock; 1600 } 1601 1602 /* The way is open! Fastly set all the necessary fields... */ 1603 1604 sock_hold(sk); 1605 unix_peer(newsk) = sk; 1606 newsk->sk_state = TCP_ESTABLISHED; 1607 newsk->sk_type = sk->sk_type; 1608 init_peercred(newsk); 1609 newu = unix_sk(newsk); 1610 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1611 otheru = unix_sk(other); 1612 1613 /* copy address information from listening to new sock 1614 * 1615 * The contents of *(otheru->addr) and otheru->path 1616 * are seen fully set up here, since we have found 1617 * otheru in hash under its lock. Insertion into the 1618 * hash chain we'd found it in had been done in an 1619 * earlier critical area protected by the chain's lock, 1620 * the same one where we'd set *(otheru->addr) contents, 1621 * as well as otheru->path and otheru->addr itself. 1622 * 1623 * Using smp_store_release() here to set newu->addr 1624 * is enough to make those stores, as well as stores 1625 * to newu->path visible to anyone who gets newu->addr 1626 * by smp_load_acquire(). IOW, the same warranties 1627 * as for unix_sock instances bound in unix_bind() or 1628 * in unix_autobind(). 1629 */ 1630 if (otheru->path.dentry) { 1631 path_get(&otheru->path); 1632 newu->path = otheru->path; 1633 } 1634 refcount_inc(&otheru->addr->refcnt); 1635 smp_store_release(&newu->addr, otheru->addr); 1636 1637 /* Set credentials */ 1638 copy_peercred(sk, other); 1639 1640 sock->state = SS_CONNECTED; 1641 sk->sk_state = TCP_ESTABLISHED; 1642 sock_hold(newsk); 1643 1644 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1645 unix_peer(sk) = newsk; 1646 1647 unix_state_unlock(sk); 1648 1649 /* take ten and send info to listening sock */ 1650 spin_lock(&other->sk_receive_queue.lock); 1651 __skb_queue_tail(&other->sk_receive_queue, skb); 1652 spin_unlock(&other->sk_receive_queue.lock); 1653 unix_state_unlock(other); 1654 other->sk_data_ready(other); 1655 sock_put(other); 1656 return 0; 1657 1658 out_unlock: 1659 if (other) 1660 unix_state_unlock(other); 1661 1662 out: 1663 kfree_skb(skb); 1664 if (newsk) 1665 unix_release_sock(newsk, 0); 1666 if (other) 1667 sock_put(other); 1668 return err; 1669 } 1670 1671 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1672 { 1673 struct sock *ska = socka->sk, *skb = sockb->sk; 1674 1675 /* Join our sockets back to back */ 1676 sock_hold(ska); 1677 sock_hold(skb); 1678 unix_peer(ska) = skb; 1679 unix_peer(skb) = ska; 1680 init_peercred(ska); 1681 init_peercred(skb); 1682 1683 ska->sk_state = TCP_ESTABLISHED; 1684 skb->sk_state = TCP_ESTABLISHED; 1685 socka->state = SS_CONNECTED; 1686 sockb->state = SS_CONNECTED; 1687 return 0; 1688 } 1689 1690 static void unix_sock_inherit_flags(const struct socket *old, 1691 struct socket *new) 1692 { 1693 if (test_bit(SOCK_PASSCRED, &old->flags)) 1694 set_bit(SOCK_PASSCRED, &new->flags); 1695 if (test_bit(SOCK_PASSPIDFD, &old->flags)) 1696 set_bit(SOCK_PASSPIDFD, &new->flags); 1697 if (test_bit(SOCK_PASSSEC, &old->flags)) 1698 set_bit(SOCK_PASSSEC, &new->flags); 1699 } 1700 1701 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1702 bool kern) 1703 { 1704 struct sock *sk = sock->sk; 1705 struct sock *tsk; 1706 struct sk_buff *skb; 1707 int err; 1708 1709 err = -EOPNOTSUPP; 1710 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1711 goto out; 1712 1713 err = -EINVAL; 1714 if (sk->sk_state != TCP_LISTEN) 1715 goto out; 1716 1717 /* If socket state is TCP_LISTEN it cannot change (for now...), 1718 * so that no locks are necessary. 1719 */ 1720 1721 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1722 &err); 1723 if (!skb) { 1724 /* This means receive shutdown. */ 1725 if (err == 0) 1726 err = -EINVAL; 1727 goto out; 1728 } 1729 1730 tsk = skb->sk; 1731 skb_free_datagram(sk, skb); 1732 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1733 1734 /* attach accepted sock to socket */ 1735 unix_state_lock(tsk); 1736 newsock->state = SS_CONNECTED; 1737 unix_sock_inherit_flags(sock, newsock); 1738 sock_graft(tsk, newsock); 1739 unix_state_unlock(tsk); 1740 return 0; 1741 1742 out: 1743 return err; 1744 } 1745 1746 1747 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1748 { 1749 struct sock *sk = sock->sk; 1750 struct unix_address *addr; 1751 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1752 int err = 0; 1753 1754 if (peer) { 1755 sk = unix_peer_get(sk); 1756 1757 err = -ENOTCONN; 1758 if (!sk) 1759 goto out; 1760 err = 0; 1761 } else { 1762 sock_hold(sk); 1763 } 1764 1765 addr = smp_load_acquire(&unix_sk(sk)->addr); 1766 if (!addr) { 1767 sunaddr->sun_family = AF_UNIX; 1768 sunaddr->sun_path[0] = 0; 1769 err = offsetof(struct sockaddr_un, sun_path); 1770 } else { 1771 err = addr->len; 1772 memcpy(sunaddr, addr->name, addr->len); 1773 } 1774 sock_put(sk); 1775 out: 1776 return err; 1777 } 1778 1779 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1780 { 1781 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1782 1783 /* 1784 * Garbage collection of unix sockets starts by selecting a set of 1785 * candidate sockets which have reference only from being in flight 1786 * (total_refs == inflight_refs). This condition is checked once during 1787 * the candidate collection phase, and candidates are marked as such, so 1788 * that non-candidates can later be ignored. While inflight_refs is 1789 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1790 * is an instantaneous decision. 1791 * 1792 * Once a candidate, however, the socket must not be reinstalled into a 1793 * file descriptor while the garbage collection is in progress. 1794 * 1795 * If the above conditions are met, then the directed graph of 1796 * candidates (*) does not change while unix_gc_lock is held. 1797 * 1798 * Any operations that changes the file count through file descriptors 1799 * (dup, close, sendmsg) does not change the graph since candidates are 1800 * not installed in fds. 1801 * 1802 * Dequeing a candidate via recvmsg would install it into an fd, but 1803 * that takes unix_gc_lock to decrement the inflight count, so it's 1804 * serialized with garbage collection. 1805 * 1806 * MSG_PEEK is special in that it does not change the inflight count, 1807 * yet does install the socket into an fd. The following lock/unlock 1808 * pair is to ensure serialization with garbage collection. It must be 1809 * done between incrementing the file count and installing the file into 1810 * an fd. 1811 * 1812 * If garbage collection starts after the barrier provided by the 1813 * lock/unlock, then it will see the elevated refcount and not mark this 1814 * as a candidate. If a garbage collection is already in progress 1815 * before the file count was incremented, then the lock/unlock pair will 1816 * ensure that garbage collection is finished before progressing to 1817 * installing the fd. 1818 * 1819 * (*) A -> B where B is on the queue of A or B is on the queue of C 1820 * which is on the queue of listening socket A. 1821 */ 1822 spin_lock(&unix_gc_lock); 1823 spin_unlock(&unix_gc_lock); 1824 } 1825 1826 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1827 { 1828 int err = 0; 1829 1830 UNIXCB(skb).pid = get_pid(scm->pid); 1831 UNIXCB(skb).uid = scm->creds.uid; 1832 UNIXCB(skb).gid = scm->creds.gid; 1833 UNIXCB(skb).fp = NULL; 1834 unix_get_secdata(scm, skb); 1835 if (scm->fp && send_fds) 1836 err = unix_attach_fds(scm, skb); 1837 1838 skb->destructor = unix_destruct_scm; 1839 return err; 1840 } 1841 1842 static bool unix_passcred_enabled(const struct socket *sock, 1843 const struct sock *other) 1844 { 1845 return test_bit(SOCK_PASSCRED, &sock->flags) || 1846 test_bit(SOCK_PASSPIDFD, &sock->flags) || 1847 !other->sk_socket || 1848 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || 1849 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); 1850 } 1851 1852 /* 1853 * Some apps rely on write() giving SCM_CREDENTIALS 1854 * We include credentials if source or destination socket 1855 * asserted SOCK_PASSCRED. 1856 */ 1857 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1858 const struct sock *other) 1859 { 1860 if (UNIXCB(skb).pid) 1861 return; 1862 if (unix_passcred_enabled(sock, other)) { 1863 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1864 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1865 } 1866 } 1867 1868 static bool unix_skb_scm_eq(struct sk_buff *skb, 1869 struct scm_cookie *scm) 1870 { 1871 return UNIXCB(skb).pid == scm->pid && 1872 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1873 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1874 unix_secdata_eq(scm, skb); 1875 } 1876 1877 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1878 { 1879 struct scm_fp_list *fp = UNIXCB(skb).fp; 1880 struct unix_sock *u = unix_sk(sk); 1881 1882 if (unlikely(fp && fp->count)) 1883 atomic_add(fp->count, &u->scm_stat.nr_fds); 1884 } 1885 1886 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1887 { 1888 struct scm_fp_list *fp = UNIXCB(skb).fp; 1889 struct unix_sock *u = unix_sk(sk); 1890 1891 if (unlikely(fp && fp->count)) 1892 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1893 } 1894 1895 /* 1896 * Send AF_UNIX data. 1897 */ 1898 1899 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1900 size_t len) 1901 { 1902 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1903 struct sock *sk = sock->sk, *other = NULL; 1904 struct unix_sock *u = unix_sk(sk); 1905 struct scm_cookie scm; 1906 struct sk_buff *skb; 1907 int data_len = 0; 1908 int sk_locked; 1909 long timeo; 1910 int err; 1911 1912 wait_for_unix_gc(); 1913 err = scm_send(sock, msg, &scm, false); 1914 if (err < 0) 1915 return err; 1916 1917 err = -EOPNOTSUPP; 1918 if (msg->msg_flags&MSG_OOB) 1919 goto out; 1920 1921 if (msg->msg_namelen) { 1922 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1923 if (err) 1924 goto out; 1925 } else { 1926 sunaddr = NULL; 1927 err = -ENOTCONN; 1928 other = unix_peer_get(sk); 1929 if (!other) 1930 goto out; 1931 } 1932 1933 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1934 test_bit(SOCK_PASSPIDFD, &sock->flags)) && 1935 !READ_ONCE(u->addr)) { 1936 err = unix_autobind(sk); 1937 if (err) 1938 goto out; 1939 } 1940 1941 err = -EMSGSIZE; 1942 if (len > sk->sk_sndbuf - 32) 1943 goto out; 1944 1945 if (len > SKB_MAX_ALLOC) { 1946 data_len = min_t(size_t, 1947 len - SKB_MAX_ALLOC, 1948 MAX_SKB_FRAGS * PAGE_SIZE); 1949 data_len = PAGE_ALIGN(data_len); 1950 1951 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1952 } 1953 1954 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1955 msg->msg_flags & MSG_DONTWAIT, &err, 1956 PAGE_ALLOC_COSTLY_ORDER); 1957 if (skb == NULL) 1958 goto out; 1959 1960 err = unix_scm_to_skb(&scm, skb, true); 1961 if (err < 0) 1962 goto out_free; 1963 1964 skb_put(skb, len - data_len); 1965 skb->data_len = data_len; 1966 skb->len = len; 1967 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1968 if (err) 1969 goto out_free; 1970 1971 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1972 1973 restart: 1974 if (!other) { 1975 err = -ECONNRESET; 1976 if (sunaddr == NULL) 1977 goto out_free; 1978 1979 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1980 sk->sk_type); 1981 if (IS_ERR(other)) { 1982 err = PTR_ERR(other); 1983 other = NULL; 1984 goto out_free; 1985 } 1986 } 1987 1988 if (sk_filter(other, skb) < 0) { 1989 /* Toss the packet but do not return any error to the sender */ 1990 err = len; 1991 goto out_free; 1992 } 1993 1994 sk_locked = 0; 1995 unix_state_lock(other); 1996 restart_locked: 1997 err = -EPERM; 1998 if (!unix_may_send(sk, other)) 1999 goto out_unlock; 2000 2001 if (unlikely(sock_flag(other, SOCK_DEAD))) { 2002 /* 2003 * Check with 1003.1g - what should 2004 * datagram error 2005 */ 2006 unix_state_unlock(other); 2007 sock_put(other); 2008 2009 if (!sk_locked) 2010 unix_state_lock(sk); 2011 2012 err = 0; 2013 if (sk->sk_type == SOCK_SEQPACKET) { 2014 /* We are here only when racing with unix_release_sock() 2015 * is clearing @other. Never change state to TCP_CLOSE 2016 * unlike SOCK_DGRAM wants. 2017 */ 2018 unix_state_unlock(sk); 2019 err = -EPIPE; 2020 } else if (unix_peer(sk) == other) { 2021 unix_peer(sk) = NULL; 2022 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 2023 2024 sk->sk_state = TCP_CLOSE; 2025 unix_state_unlock(sk); 2026 2027 unix_dgram_disconnected(sk, other); 2028 sock_put(other); 2029 err = -ECONNREFUSED; 2030 } else { 2031 unix_state_unlock(sk); 2032 } 2033 2034 other = NULL; 2035 if (err) 2036 goto out_free; 2037 goto restart; 2038 } 2039 2040 err = -EPIPE; 2041 if (other->sk_shutdown & RCV_SHUTDOWN) 2042 goto out_unlock; 2043 2044 if (sk->sk_type != SOCK_SEQPACKET) { 2045 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 2046 if (err) 2047 goto out_unlock; 2048 } 2049 2050 /* other == sk && unix_peer(other) != sk if 2051 * - unix_peer(sk) == NULL, destination address bound to sk 2052 * - unix_peer(sk) == sk by time of get but disconnected before lock 2053 */ 2054 if (other != sk && 2055 unlikely(unix_peer(other) != sk && 2056 unix_recvq_full_lockless(other))) { 2057 if (timeo) { 2058 timeo = unix_wait_for_peer(other, timeo); 2059 2060 err = sock_intr_errno(timeo); 2061 if (signal_pending(current)) 2062 goto out_free; 2063 2064 goto restart; 2065 } 2066 2067 if (!sk_locked) { 2068 unix_state_unlock(other); 2069 unix_state_double_lock(sk, other); 2070 } 2071 2072 if (unix_peer(sk) != other || 2073 unix_dgram_peer_wake_me(sk, other)) { 2074 err = -EAGAIN; 2075 sk_locked = 1; 2076 goto out_unlock; 2077 } 2078 2079 if (!sk_locked) { 2080 sk_locked = 1; 2081 goto restart_locked; 2082 } 2083 } 2084 2085 if (unlikely(sk_locked)) 2086 unix_state_unlock(sk); 2087 2088 if (sock_flag(other, SOCK_RCVTSTAMP)) 2089 __net_timestamp(skb); 2090 maybe_add_creds(skb, sock, other); 2091 scm_stat_add(other, skb); 2092 skb_queue_tail(&other->sk_receive_queue, skb); 2093 unix_state_unlock(other); 2094 other->sk_data_ready(other); 2095 sock_put(other); 2096 scm_destroy(&scm); 2097 return len; 2098 2099 out_unlock: 2100 if (sk_locked) 2101 unix_state_unlock(sk); 2102 unix_state_unlock(other); 2103 out_free: 2104 kfree_skb(skb); 2105 out: 2106 if (other) 2107 sock_put(other); 2108 scm_destroy(&scm); 2109 return err; 2110 } 2111 2112 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2113 * bytes, and a minimum of a full page. 2114 */ 2115 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2116 2117 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2118 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, 2119 struct scm_cookie *scm, bool fds_sent) 2120 { 2121 struct unix_sock *ousk = unix_sk(other); 2122 struct sk_buff *skb; 2123 int err = 0; 2124 2125 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2126 2127 if (!skb) 2128 return err; 2129 2130 err = unix_scm_to_skb(scm, skb, !fds_sent); 2131 if (err < 0) { 2132 kfree_skb(skb); 2133 return err; 2134 } 2135 skb_put(skb, 1); 2136 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2137 2138 if (err) { 2139 kfree_skb(skb); 2140 return err; 2141 } 2142 2143 unix_state_lock(other); 2144 2145 if (sock_flag(other, SOCK_DEAD) || 2146 (other->sk_shutdown & RCV_SHUTDOWN)) { 2147 unix_state_unlock(other); 2148 kfree_skb(skb); 2149 return -EPIPE; 2150 } 2151 2152 maybe_add_creds(skb, sock, other); 2153 skb_get(skb); 2154 2155 scm_stat_add(other, skb); 2156 2157 spin_lock(&other->sk_receive_queue.lock); 2158 if (ousk->oob_skb) 2159 consume_skb(ousk->oob_skb); 2160 WRITE_ONCE(ousk->oob_skb, skb); 2161 __skb_queue_tail(&other->sk_receive_queue, skb); 2162 spin_unlock(&other->sk_receive_queue.lock); 2163 2164 sk_send_sigurg(other); 2165 unix_state_unlock(other); 2166 other->sk_data_ready(other); 2167 2168 return err; 2169 } 2170 #endif 2171 2172 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2173 size_t len) 2174 { 2175 struct sock *sk = sock->sk; 2176 struct sock *other = NULL; 2177 int err, size; 2178 struct sk_buff *skb; 2179 int sent = 0; 2180 struct scm_cookie scm; 2181 bool fds_sent = false; 2182 int data_len; 2183 2184 wait_for_unix_gc(); 2185 err = scm_send(sock, msg, &scm, false); 2186 if (err < 0) 2187 return err; 2188 2189 err = -EOPNOTSUPP; 2190 if (msg->msg_flags & MSG_OOB) { 2191 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2192 if (len) 2193 len--; 2194 else 2195 #endif 2196 goto out_err; 2197 } 2198 2199 if (msg->msg_namelen) { 2200 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2201 goto out_err; 2202 } else { 2203 err = -ENOTCONN; 2204 other = unix_peer(sk); 2205 if (!other) 2206 goto out_err; 2207 } 2208 2209 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) 2210 goto pipe_err; 2211 2212 while (sent < len) { 2213 size = len - sent; 2214 2215 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2216 skb = sock_alloc_send_pskb(sk, 0, 0, 2217 msg->msg_flags & MSG_DONTWAIT, 2218 &err, 0); 2219 } else { 2220 /* Keep two messages in the pipe so it schedules better */ 2221 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2222 2223 /* allow fallback to order-0 allocations */ 2224 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2225 2226 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2227 2228 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2229 2230 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2231 msg->msg_flags & MSG_DONTWAIT, &err, 2232 get_order(UNIX_SKB_FRAGS_SZ)); 2233 } 2234 if (!skb) 2235 goto out_err; 2236 2237 /* Only send the fds in the first buffer */ 2238 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2239 if (err < 0) { 2240 kfree_skb(skb); 2241 goto out_err; 2242 } 2243 fds_sent = true; 2244 2245 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2246 err = skb_splice_from_iter(skb, &msg->msg_iter, size, 2247 sk->sk_allocation); 2248 if (err < 0) { 2249 kfree_skb(skb); 2250 goto out_err; 2251 } 2252 size = err; 2253 refcount_add(size, &sk->sk_wmem_alloc); 2254 } else { 2255 skb_put(skb, size - data_len); 2256 skb->data_len = data_len; 2257 skb->len = size; 2258 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2259 if (err) { 2260 kfree_skb(skb); 2261 goto out_err; 2262 } 2263 } 2264 2265 unix_state_lock(other); 2266 2267 if (sock_flag(other, SOCK_DEAD) || 2268 (other->sk_shutdown & RCV_SHUTDOWN)) 2269 goto pipe_err_free; 2270 2271 maybe_add_creds(skb, sock, other); 2272 scm_stat_add(other, skb); 2273 skb_queue_tail(&other->sk_receive_queue, skb); 2274 unix_state_unlock(other); 2275 other->sk_data_ready(other); 2276 sent += size; 2277 } 2278 2279 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2280 if (msg->msg_flags & MSG_OOB) { 2281 err = queue_oob(sock, msg, other, &scm, fds_sent); 2282 if (err) 2283 goto out_err; 2284 sent++; 2285 } 2286 #endif 2287 2288 scm_destroy(&scm); 2289 2290 return sent; 2291 2292 pipe_err_free: 2293 unix_state_unlock(other); 2294 kfree_skb(skb); 2295 pipe_err: 2296 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2297 send_sig(SIGPIPE, current, 0); 2298 err = -EPIPE; 2299 out_err: 2300 scm_destroy(&scm); 2301 return sent ? : err; 2302 } 2303 2304 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2305 size_t len) 2306 { 2307 int err; 2308 struct sock *sk = sock->sk; 2309 2310 err = sock_error(sk); 2311 if (err) 2312 return err; 2313 2314 if (sk->sk_state != TCP_ESTABLISHED) 2315 return -ENOTCONN; 2316 2317 if (msg->msg_namelen) 2318 msg->msg_namelen = 0; 2319 2320 return unix_dgram_sendmsg(sock, msg, len); 2321 } 2322 2323 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2324 size_t size, int flags) 2325 { 2326 struct sock *sk = sock->sk; 2327 2328 if (sk->sk_state != TCP_ESTABLISHED) 2329 return -ENOTCONN; 2330 2331 return unix_dgram_recvmsg(sock, msg, size, flags); 2332 } 2333 2334 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2335 { 2336 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2337 2338 if (addr) { 2339 msg->msg_namelen = addr->len; 2340 memcpy(msg->msg_name, addr->name, addr->len); 2341 } 2342 } 2343 2344 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2345 int flags) 2346 { 2347 struct scm_cookie scm; 2348 struct socket *sock = sk->sk_socket; 2349 struct unix_sock *u = unix_sk(sk); 2350 struct sk_buff *skb, *last; 2351 long timeo; 2352 int skip; 2353 int err; 2354 2355 err = -EOPNOTSUPP; 2356 if (flags&MSG_OOB) 2357 goto out; 2358 2359 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2360 2361 do { 2362 mutex_lock(&u->iolock); 2363 2364 skip = sk_peek_offset(sk, flags); 2365 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2366 &skip, &err, &last); 2367 if (skb) { 2368 if (!(flags & MSG_PEEK)) 2369 scm_stat_del(sk, skb); 2370 break; 2371 } 2372 2373 mutex_unlock(&u->iolock); 2374 2375 if (err != -EAGAIN) 2376 break; 2377 } while (timeo && 2378 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2379 &err, &timeo, last)); 2380 2381 if (!skb) { /* implies iolock unlocked */ 2382 unix_state_lock(sk); 2383 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2384 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2385 (sk->sk_shutdown & RCV_SHUTDOWN)) 2386 err = 0; 2387 unix_state_unlock(sk); 2388 goto out; 2389 } 2390 2391 if (wq_has_sleeper(&u->peer_wait)) 2392 wake_up_interruptible_sync_poll(&u->peer_wait, 2393 EPOLLOUT | EPOLLWRNORM | 2394 EPOLLWRBAND); 2395 2396 if (msg->msg_name) 2397 unix_copy_addr(msg, skb->sk); 2398 2399 if (size > skb->len - skip) 2400 size = skb->len - skip; 2401 else if (size < skb->len - skip) 2402 msg->msg_flags |= MSG_TRUNC; 2403 2404 err = skb_copy_datagram_msg(skb, skip, msg, size); 2405 if (err) 2406 goto out_free; 2407 2408 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2409 __sock_recv_timestamp(msg, sk, skb); 2410 2411 memset(&scm, 0, sizeof(scm)); 2412 2413 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2414 unix_set_secdata(&scm, skb); 2415 2416 if (!(flags & MSG_PEEK)) { 2417 if (UNIXCB(skb).fp) 2418 unix_detach_fds(&scm, skb); 2419 2420 sk_peek_offset_bwd(sk, skb->len); 2421 } else { 2422 /* It is questionable: on PEEK we could: 2423 - do not return fds - good, but too simple 8) 2424 - return fds, and do not return them on read (old strategy, 2425 apparently wrong) 2426 - clone fds (I chose it for now, it is the most universal 2427 solution) 2428 2429 POSIX 1003.1g does not actually define this clearly 2430 at all. POSIX 1003.1g doesn't define a lot of things 2431 clearly however! 2432 2433 */ 2434 2435 sk_peek_offset_fwd(sk, size); 2436 2437 if (UNIXCB(skb).fp) 2438 unix_peek_fds(&scm, skb); 2439 } 2440 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2441 2442 scm_recv_unix(sock, msg, &scm, flags); 2443 2444 out_free: 2445 skb_free_datagram(sk, skb); 2446 mutex_unlock(&u->iolock); 2447 out: 2448 return err; 2449 } 2450 2451 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2452 int flags) 2453 { 2454 struct sock *sk = sock->sk; 2455 2456 #ifdef CONFIG_BPF_SYSCALL 2457 const struct proto *prot = READ_ONCE(sk->sk_prot); 2458 2459 if (prot != &unix_dgram_proto) 2460 return prot->recvmsg(sk, msg, size, flags, NULL); 2461 #endif 2462 return __unix_dgram_recvmsg(sk, msg, size, flags); 2463 } 2464 2465 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2466 { 2467 struct unix_sock *u = unix_sk(sk); 2468 struct sk_buff *skb; 2469 int err; 2470 2471 mutex_lock(&u->iolock); 2472 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2473 mutex_unlock(&u->iolock); 2474 if (!skb) 2475 return err; 2476 2477 return recv_actor(sk, skb); 2478 } 2479 2480 /* 2481 * Sleep until more data has arrived. But check for races.. 2482 */ 2483 static long unix_stream_data_wait(struct sock *sk, long timeo, 2484 struct sk_buff *last, unsigned int last_len, 2485 bool freezable) 2486 { 2487 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; 2488 struct sk_buff *tail; 2489 DEFINE_WAIT(wait); 2490 2491 unix_state_lock(sk); 2492 2493 for (;;) { 2494 prepare_to_wait(sk_sleep(sk), &wait, state); 2495 2496 tail = skb_peek_tail(&sk->sk_receive_queue); 2497 if (tail != last || 2498 (tail && tail->len != last_len) || 2499 sk->sk_err || 2500 (sk->sk_shutdown & RCV_SHUTDOWN) || 2501 signal_pending(current) || 2502 !timeo) 2503 break; 2504 2505 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2506 unix_state_unlock(sk); 2507 timeo = schedule_timeout(timeo); 2508 unix_state_lock(sk); 2509 2510 if (sock_flag(sk, SOCK_DEAD)) 2511 break; 2512 2513 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2514 } 2515 2516 finish_wait(sk_sleep(sk), &wait); 2517 unix_state_unlock(sk); 2518 return timeo; 2519 } 2520 2521 static unsigned int unix_skb_len(const struct sk_buff *skb) 2522 { 2523 return skb->len - UNIXCB(skb).consumed; 2524 } 2525 2526 struct unix_stream_read_state { 2527 int (*recv_actor)(struct sk_buff *, int, int, 2528 struct unix_stream_read_state *); 2529 struct socket *socket; 2530 struct msghdr *msg; 2531 struct pipe_inode_info *pipe; 2532 size_t size; 2533 int flags; 2534 unsigned int splice_flags; 2535 }; 2536 2537 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2538 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2539 { 2540 struct socket *sock = state->socket; 2541 struct sock *sk = sock->sk; 2542 struct unix_sock *u = unix_sk(sk); 2543 int chunk = 1; 2544 struct sk_buff *oob_skb; 2545 2546 mutex_lock(&u->iolock); 2547 unix_state_lock(sk); 2548 spin_lock(&sk->sk_receive_queue.lock); 2549 2550 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2551 spin_unlock(&sk->sk_receive_queue.lock); 2552 unix_state_unlock(sk); 2553 mutex_unlock(&u->iolock); 2554 return -EINVAL; 2555 } 2556 2557 oob_skb = u->oob_skb; 2558 2559 if (!(state->flags & MSG_PEEK)) 2560 WRITE_ONCE(u->oob_skb, NULL); 2561 else 2562 skb_get(oob_skb); 2563 2564 spin_unlock(&sk->sk_receive_queue.lock); 2565 unix_state_unlock(sk); 2566 2567 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2568 2569 if (!(state->flags & MSG_PEEK)) 2570 UNIXCB(oob_skb).consumed += 1; 2571 2572 consume_skb(oob_skb); 2573 2574 mutex_unlock(&u->iolock); 2575 2576 if (chunk < 0) 2577 return -EFAULT; 2578 2579 state->msg->msg_flags |= MSG_OOB; 2580 return 1; 2581 } 2582 2583 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2584 int flags, int copied) 2585 { 2586 struct unix_sock *u = unix_sk(sk); 2587 2588 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2589 skb_unlink(skb, &sk->sk_receive_queue); 2590 consume_skb(skb); 2591 skb = NULL; 2592 } else { 2593 struct sk_buff *unlinked_skb = NULL; 2594 2595 spin_lock(&sk->sk_receive_queue.lock); 2596 2597 if (skb == u->oob_skb) { 2598 if (copied) { 2599 skb = NULL; 2600 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2601 if (!(flags & MSG_PEEK)) { 2602 WRITE_ONCE(u->oob_skb, NULL); 2603 consume_skb(skb); 2604 } 2605 } else if (flags & MSG_PEEK) { 2606 skb = NULL; 2607 } else { 2608 __skb_unlink(skb, &sk->sk_receive_queue); 2609 WRITE_ONCE(u->oob_skb, NULL); 2610 unlinked_skb = skb; 2611 skb = skb_peek(&sk->sk_receive_queue); 2612 } 2613 } 2614 2615 spin_unlock(&sk->sk_receive_queue.lock); 2616 2617 if (unlinked_skb) { 2618 WARN_ON_ONCE(skb_unref(unlinked_skb)); 2619 kfree_skb(unlinked_skb); 2620 } 2621 } 2622 return skb; 2623 } 2624 #endif 2625 2626 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2627 { 2628 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2629 return -ENOTCONN; 2630 2631 return unix_read_skb(sk, recv_actor); 2632 } 2633 2634 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2635 bool freezable) 2636 { 2637 struct scm_cookie scm; 2638 struct socket *sock = state->socket; 2639 struct sock *sk = sock->sk; 2640 struct unix_sock *u = unix_sk(sk); 2641 int copied = 0; 2642 int flags = state->flags; 2643 int noblock = flags & MSG_DONTWAIT; 2644 bool check_creds = false; 2645 int target; 2646 int err = 0; 2647 long timeo; 2648 int skip; 2649 size_t size = state->size; 2650 unsigned int last_len; 2651 2652 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2653 err = -EINVAL; 2654 goto out; 2655 } 2656 2657 if (unlikely(flags & MSG_OOB)) { 2658 err = -EOPNOTSUPP; 2659 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2660 err = unix_stream_recv_urg(state); 2661 #endif 2662 goto out; 2663 } 2664 2665 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2666 timeo = sock_rcvtimeo(sk, noblock); 2667 2668 memset(&scm, 0, sizeof(scm)); 2669 2670 /* Lock the socket to prevent queue disordering 2671 * while sleeps in memcpy_tomsg 2672 */ 2673 mutex_lock(&u->iolock); 2674 2675 skip = max(sk_peek_offset(sk, flags), 0); 2676 2677 do { 2678 int chunk; 2679 bool drop_skb; 2680 struct sk_buff *skb, *last; 2681 2682 redo: 2683 unix_state_lock(sk); 2684 if (sock_flag(sk, SOCK_DEAD)) { 2685 err = -ECONNRESET; 2686 goto unlock; 2687 } 2688 last = skb = skb_peek(&sk->sk_receive_queue); 2689 last_len = last ? last->len : 0; 2690 2691 again: 2692 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2693 if (skb) { 2694 skb = manage_oob(skb, sk, flags, copied); 2695 if (!skb && copied) { 2696 unix_state_unlock(sk); 2697 break; 2698 } 2699 } 2700 #endif 2701 if (skb == NULL) { 2702 if (copied >= target) 2703 goto unlock; 2704 2705 /* 2706 * POSIX 1003.1g mandates this order. 2707 */ 2708 2709 err = sock_error(sk); 2710 if (err) 2711 goto unlock; 2712 if (sk->sk_shutdown & RCV_SHUTDOWN) 2713 goto unlock; 2714 2715 unix_state_unlock(sk); 2716 if (!timeo) { 2717 err = -EAGAIN; 2718 break; 2719 } 2720 2721 mutex_unlock(&u->iolock); 2722 2723 timeo = unix_stream_data_wait(sk, timeo, last, 2724 last_len, freezable); 2725 2726 if (signal_pending(current)) { 2727 err = sock_intr_errno(timeo); 2728 scm_destroy(&scm); 2729 goto out; 2730 } 2731 2732 mutex_lock(&u->iolock); 2733 goto redo; 2734 unlock: 2735 unix_state_unlock(sk); 2736 break; 2737 } 2738 2739 while (skip >= unix_skb_len(skb)) { 2740 skip -= unix_skb_len(skb); 2741 last = skb; 2742 last_len = skb->len; 2743 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2744 if (!skb) 2745 goto again; 2746 } 2747 2748 unix_state_unlock(sk); 2749 2750 if (check_creds) { 2751 /* Never glue messages from different writers */ 2752 if (!unix_skb_scm_eq(skb, &scm)) 2753 break; 2754 } else if (test_bit(SOCK_PASSCRED, &sock->flags) || 2755 test_bit(SOCK_PASSPIDFD, &sock->flags)) { 2756 /* Copy credentials */ 2757 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2758 unix_set_secdata(&scm, skb); 2759 check_creds = true; 2760 } 2761 2762 /* Copy address just once */ 2763 if (state->msg && state->msg->msg_name) { 2764 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2765 state->msg->msg_name); 2766 unix_copy_addr(state->msg, skb->sk); 2767 sunaddr = NULL; 2768 } 2769 2770 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2771 skb_get(skb); 2772 chunk = state->recv_actor(skb, skip, chunk, state); 2773 drop_skb = !unix_skb_len(skb); 2774 /* skb is only safe to use if !drop_skb */ 2775 consume_skb(skb); 2776 if (chunk < 0) { 2777 if (copied == 0) 2778 copied = -EFAULT; 2779 break; 2780 } 2781 copied += chunk; 2782 size -= chunk; 2783 2784 if (drop_skb) { 2785 /* the skb was touched by a concurrent reader; 2786 * we should not expect anything from this skb 2787 * anymore and assume it invalid - we can be 2788 * sure it was dropped from the socket queue 2789 * 2790 * let's report a short read 2791 */ 2792 err = 0; 2793 break; 2794 } 2795 2796 /* Mark read part of skb as used */ 2797 if (!(flags & MSG_PEEK)) { 2798 UNIXCB(skb).consumed += chunk; 2799 2800 sk_peek_offset_bwd(sk, chunk); 2801 2802 if (UNIXCB(skb).fp) { 2803 scm_stat_del(sk, skb); 2804 unix_detach_fds(&scm, skb); 2805 } 2806 2807 if (unix_skb_len(skb)) 2808 break; 2809 2810 skb_unlink(skb, &sk->sk_receive_queue); 2811 consume_skb(skb); 2812 2813 if (scm.fp) 2814 break; 2815 } else { 2816 /* It is questionable, see note in unix_dgram_recvmsg. 2817 */ 2818 if (UNIXCB(skb).fp) 2819 unix_peek_fds(&scm, skb); 2820 2821 sk_peek_offset_fwd(sk, chunk); 2822 2823 if (UNIXCB(skb).fp) 2824 break; 2825 2826 skip = 0; 2827 last = skb; 2828 last_len = skb->len; 2829 unix_state_lock(sk); 2830 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2831 if (skb) 2832 goto again; 2833 unix_state_unlock(sk); 2834 break; 2835 } 2836 } while (size); 2837 2838 mutex_unlock(&u->iolock); 2839 if (state->msg) 2840 scm_recv_unix(sock, state->msg, &scm, flags); 2841 else 2842 scm_destroy(&scm); 2843 out: 2844 return copied ? : err; 2845 } 2846 2847 static int unix_stream_read_actor(struct sk_buff *skb, 2848 int skip, int chunk, 2849 struct unix_stream_read_state *state) 2850 { 2851 int ret; 2852 2853 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2854 state->msg, chunk); 2855 return ret ?: chunk; 2856 } 2857 2858 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2859 size_t size, int flags) 2860 { 2861 struct unix_stream_read_state state = { 2862 .recv_actor = unix_stream_read_actor, 2863 .socket = sk->sk_socket, 2864 .msg = msg, 2865 .size = size, 2866 .flags = flags 2867 }; 2868 2869 return unix_stream_read_generic(&state, true); 2870 } 2871 2872 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2873 size_t size, int flags) 2874 { 2875 struct unix_stream_read_state state = { 2876 .recv_actor = unix_stream_read_actor, 2877 .socket = sock, 2878 .msg = msg, 2879 .size = size, 2880 .flags = flags 2881 }; 2882 2883 #ifdef CONFIG_BPF_SYSCALL 2884 struct sock *sk = sock->sk; 2885 const struct proto *prot = READ_ONCE(sk->sk_prot); 2886 2887 if (prot != &unix_stream_proto) 2888 return prot->recvmsg(sk, msg, size, flags, NULL); 2889 #endif 2890 return unix_stream_read_generic(&state, true); 2891 } 2892 2893 static int unix_stream_splice_actor(struct sk_buff *skb, 2894 int skip, int chunk, 2895 struct unix_stream_read_state *state) 2896 { 2897 return skb_splice_bits(skb, state->socket->sk, 2898 UNIXCB(skb).consumed + skip, 2899 state->pipe, chunk, state->splice_flags); 2900 } 2901 2902 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2903 struct pipe_inode_info *pipe, 2904 size_t size, unsigned int flags) 2905 { 2906 struct unix_stream_read_state state = { 2907 .recv_actor = unix_stream_splice_actor, 2908 .socket = sock, 2909 .pipe = pipe, 2910 .size = size, 2911 .splice_flags = flags, 2912 }; 2913 2914 if (unlikely(*ppos)) 2915 return -ESPIPE; 2916 2917 if (sock->file->f_flags & O_NONBLOCK || 2918 flags & SPLICE_F_NONBLOCK) 2919 state.flags = MSG_DONTWAIT; 2920 2921 return unix_stream_read_generic(&state, false); 2922 } 2923 2924 static int unix_shutdown(struct socket *sock, int mode) 2925 { 2926 struct sock *sk = sock->sk; 2927 struct sock *other; 2928 2929 if (mode < SHUT_RD || mode > SHUT_RDWR) 2930 return -EINVAL; 2931 /* This maps: 2932 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2933 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2934 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2935 */ 2936 ++mode; 2937 2938 unix_state_lock(sk); 2939 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); 2940 other = unix_peer(sk); 2941 if (other) 2942 sock_hold(other); 2943 unix_state_unlock(sk); 2944 sk->sk_state_change(sk); 2945 2946 if (other && 2947 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2948 2949 int peer_mode = 0; 2950 const struct proto *prot = READ_ONCE(other->sk_prot); 2951 2952 if (prot->unhash) 2953 prot->unhash(other); 2954 if (mode&RCV_SHUTDOWN) 2955 peer_mode |= SEND_SHUTDOWN; 2956 if (mode&SEND_SHUTDOWN) 2957 peer_mode |= RCV_SHUTDOWN; 2958 unix_state_lock(other); 2959 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); 2960 unix_state_unlock(other); 2961 other->sk_state_change(other); 2962 if (peer_mode == SHUTDOWN_MASK) 2963 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2964 else if (peer_mode & RCV_SHUTDOWN) 2965 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2966 } 2967 if (other) 2968 sock_put(other); 2969 2970 return 0; 2971 } 2972 2973 long unix_inq_len(struct sock *sk) 2974 { 2975 struct sk_buff *skb; 2976 long amount = 0; 2977 2978 if (sk->sk_state == TCP_LISTEN) 2979 return -EINVAL; 2980 2981 spin_lock(&sk->sk_receive_queue.lock); 2982 if (sk->sk_type == SOCK_STREAM || 2983 sk->sk_type == SOCK_SEQPACKET) { 2984 skb_queue_walk(&sk->sk_receive_queue, skb) 2985 amount += unix_skb_len(skb); 2986 } else { 2987 skb = skb_peek(&sk->sk_receive_queue); 2988 if (skb) 2989 amount = skb->len; 2990 } 2991 spin_unlock(&sk->sk_receive_queue.lock); 2992 2993 return amount; 2994 } 2995 EXPORT_SYMBOL_GPL(unix_inq_len); 2996 2997 long unix_outq_len(struct sock *sk) 2998 { 2999 return sk_wmem_alloc_get(sk); 3000 } 3001 EXPORT_SYMBOL_GPL(unix_outq_len); 3002 3003 static int unix_open_file(struct sock *sk) 3004 { 3005 struct path path; 3006 struct file *f; 3007 int fd; 3008 3009 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3010 return -EPERM; 3011 3012 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3013 return -ENOENT; 3014 3015 path = unix_sk(sk)->path; 3016 if (!path.dentry) 3017 return -ENOENT; 3018 3019 path_get(&path); 3020 3021 fd = get_unused_fd_flags(O_CLOEXEC); 3022 if (fd < 0) 3023 goto out; 3024 3025 f = dentry_open(&path, O_PATH, current_cred()); 3026 if (IS_ERR(f)) { 3027 put_unused_fd(fd); 3028 fd = PTR_ERR(f); 3029 goto out; 3030 } 3031 3032 fd_install(fd, f); 3033 out: 3034 path_put(&path); 3035 3036 return fd; 3037 } 3038 3039 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3040 { 3041 struct sock *sk = sock->sk; 3042 long amount = 0; 3043 int err; 3044 3045 switch (cmd) { 3046 case SIOCOUTQ: 3047 amount = unix_outq_len(sk); 3048 err = put_user(amount, (int __user *)arg); 3049 break; 3050 case SIOCINQ: 3051 amount = unix_inq_len(sk); 3052 if (amount < 0) 3053 err = amount; 3054 else 3055 err = put_user(amount, (int __user *)arg); 3056 break; 3057 case SIOCUNIXFILE: 3058 err = unix_open_file(sk); 3059 break; 3060 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3061 case SIOCATMARK: 3062 { 3063 struct sk_buff *skb; 3064 int answ = 0; 3065 3066 skb = skb_peek(&sk->sk_receive_queue); 3067 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3068 answ = 1; 3069 err = put_user(answ, (int __user *)arg); 3070 } 3071 break; 3072 #endif 3073 default: 3074 err = -ENOIOCTLCMD; 3075 break; 3076 } 3077 return err; 3078 } 3079 3080 #ifdef CONFIG_COMPAT 3081 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3082 { 3083 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3084 } 3085 #endif 3086 3087 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3088 { 3089 struct sock *sk = sock->sk; 3090 __poll_t mask; 3091 u8 shutdown; 3092 3093 sock_poll_wait(file, sock, wait); 3094 mask = 0; 3095 shutdown = READ_ONCE(sk->sk_shutdown); 3096 3097 /* exceptional events? */ 3098 if (READ_ONCE(sk->sk_err)) 3099 mask |= EPOLLERR; 3100 if (shutdown == SHUTDOWN_MASK) 3101 mask |= EPOLLHUP; 3102 if (shutdown & RCV_SHUTDOWN) 3103 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3104 3105 /* readable? */ 3106 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3107 mask |= EPOLLIN | EPOLLRDNORM; 3108 if (sk_is_readable(sk)) 3109 mask |= EPOLLIN | EPOLLRDNORM; 3110 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3111 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3112 mask |= EPOLLPRI; 3113 #endif 3114 3115 /* Connection-based need to check for termination and startup */ 3116 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3117 sk->sk_state == TCP_CLOSE) 3118 mask |= EPOLLHUP; 3119 3120 /* 3121 * we set writable also when the other side has shut down the 3122 * connection. This prevents stuck sockets. 3123 */ 3124 if (unix_writable(sk)) 3125 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3126 3127 return mask; 3128 } 3129 3130 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3131 poll_table *wait) 3132 { 3133 struct sock *sk = sock->sk, *other; 3134 unsigned int writable; 3135 __poll_t mask; 3136 u8 shutdown; 3137 3138 sock_poll_wait(file, sock, wait); 3139 mask = 0; 3140 shutdown = READ_ONCE(sk->sk_shutdown); 3141 3142 /* exceptional events? */ 3143 if (READ_ONCE(sk->sk_err) || 3144 !skb_queue_empty_lockless(&sk->sk_error_queue)) 3145 mask |= EPOLLERR | 3146 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3147 3148 if (shutdown & RCV_SHUTDOWN) 3149 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3150 if (shutdown == SHUTDOWN_MASK) 3151 mask |= EPOLLHUP; 3152 3153 /* readable? */ 3154 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3155 mask |= EPOLLIN | EPOLLRDNORM; 3156 if (sk_is_readable(sk)) 3157 mask |= EPOLLIN | EPOLLRDNORM; 3158 3159 /* Connection-based need to check for termination and startup */ 3160 if (sk->sk_type == SOCK_SEQPACKET) { 3161 if (sk->sk_state == TCP_CLOSE) 3162 mask |= EPOLLHUP; 3163 /* connection hasn't started yet? */ 3164 if (sk->sk_state == TCP_SYN_SENT) 3165 return mask; 3166 } 3167 3168 /* No write status requested, avoid expensive OUT tests. */ 3169 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3170 return mask; 3171 3172 writable = unix_writable(sk); 3173 if (writable) { 3174 unix_state_lock(sk); 3175 3176 other = unix_peer(sk); 3177 if (other && unix_peer(other) != sk && 3178 unix_recvq_full_lockless(other) && 3179 unix_dgram_peer_wake_me(sk, other)) 3180 writable = 0; 3181 3182 unix_state_unlock(sk); 3183 } 3184 3185 if (writable) 3186 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3187 else 3188 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3189 3190 return mask; 3191 } 3192 3193 #ifdef CONFIG_PROC_FS 3194 3195 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3196 3197 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3198 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3199 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3200 3201 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3202 { 3203 unsigned long offset = get_offset(*pos); 3204 unsigned long bucket = get_bucket(*pos); 3205 unsigned long count = 0; 3206 struct sock *sk; 3207 3208 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3209 sk; sk = sk_next(sk)) { 3210 if (++count == offset) 3211 break; 3212 } 3213 3214 return sk; 3215 } 3216 3217 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3218 { 3219 unsigned long bucket = get_bucket(*pos); 3220 struct net *net = seq_file_net(seq); 3221 struct sock *sk; 3222 3223 while (bucket < UNIX_HASH_SIZE) { 3224 spin_lock(&net->unx.table.locks[bucket]); 3225 3226 sk = unix_from_bucket(seq, pos); 3227 if (sk) 3228 return sk; 3229 3230 spin_unlock(&net->unx.table.locks[bucket]); 3231 3232 *pos = set_bucket_offset(++bucket, 1); 3233 } 3234 3235 return NULL; 3236 } 3237 3238 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3239 loff_t *pos) 3240 { 3241 unsigned long bucket = get_bucket(*pos); 3242 3243 sk = sk_next(sk); 3244 if (sk) 3245 return sk; 3246 3247 3248 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3249 3250 *pos = set_bucket_offset(++bucket, 1); 3251 3252 return unix_get_first(seq, pos); 3253 } 3254 3255 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3256 { 3257 if (!*pos) 3258 return SEQ_START_TOKEN; 3259 3260 return unix_get_first(seq, pos); 3261 } 3262 3263 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3264 { 3265 ++*pos; 3266 3267 if (v == SEQ_START_TOKEN) 3268 return unix_get_first(seq, pos); 3269 3270 return unix_get_next(seq, v, pos); 3271 } 3272 3273 static void unix_seq_stop(struct seq_file *seq, void *v) 3274 { 3275 struct sock *sk = v; 3276 3277 if (sk) 3278 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3279 } 3280 3281 static int unix_seq_show(struct seq_file *seq, void *v) 3282 { 3283 3284 if (v == SEQ_START_TOKEN) 3285 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3286 "Inode Path\n"); 3287 else { 3288 struct sock *s = v; 3289 struct unix_sock *u = unix_sk(s); 3290 unix_state_lock(s); 3291 3292 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3293 s, 3294 refcount_read(&s->sk_refcnt), 3295 0, 3296 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3297 s->sk_type, 3298 s->sk_socket ? 3299 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3300 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3301 sock_i_ino(s)); 3302 3303 if (u->addr) { // under a hash table lock here 3304 int i, len; 3305 seq_putc(seq, ' '); 3306 3307 i = 0; 3308 len = u->addr->len - 3309 offsetof(struct sockaddr_un, sun_path); 3310 if (u->addr->name->sun_path[0]) { 3311 len--; 3312 } else { 3313 seq_putc(seq, '@'); 3314 i++; 3315 } 3316 for ( ; i < len; i++) 3317 seq_putc(seq, u->addr->name->sun_path[i] ?: 3318 '@'); 3319 } 3320 unix_state_unlock(s); 3321 seq_putc(seq, '\n'); 3322 } 3323 3324 return 0; 3325 } 3326 3327 static const struct seq_operations unix_seq_ops = { 3328 .start = unix_seq_start, 3329 .next = unix_seq_next, 3330 .stop = unix_seq_stop, 3331 .show = unix_seq_show, 3332 }; 3333 3334 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3335 struct bpf_unix_iter_state { 3336 struct seq_net_private p; 3337 unsigned int cur_sk; 3338 unsigned int end_sk; 3339 unsigned int max_sk; 3340 struct sock **batch; 3341 bool st_bucket_done; 3342 }; 3343 3344 struct bpf_iter__unix { 3345 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3346 __bpf_md_ptr(struct unix_sock *, unix_sk); 3347 uid_t uid __aligned(8); 3348 }; 3349 3350 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3351 struct unix_sock *unix_sk, uid_t uid) 3352 { 3353 struct bpf_iter__unix ctx; 3354 3355 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3356 ctx.meta = meta; 3357 ctx.unix_sk = unix_sk; 3358 ctx.uid = uid; 3359 return bpf_iter_run_prog(prog, &ctx); 3360 } 3361 3362 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3363 3364 { 3365 struct bpf_unix_iter_state *iter = seq->private; 3366 unsigned int expected = 1; 3367 struct sock *sk; 3368 3369 sock_hold(start_sk); 3370 iter->batch[iter->end_sk++] = start_sk; 3371 3372 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3373 if (iter->end_sk < iter->max_sk) { 3374 sock_hold(sk); 3375 iter->batch[iter->end_sk++] = sk; 3376 } 3377 3378 expected++; 3379 } 3380 3381 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3382 3383 return expected; 3384 } 3385 3386 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3387 { 3388 while (iter->cur_sk < iter->end_sk) 3389 sock_put(iter->batch[iter->cur_sk++]); 3390 } 3391 3392 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3393 unsigned int new_batch_sz) 3394 { 3395 struct sock **new_batch; 3396 3397 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3398 GFP_USER | __GFP_NOWARN); 3399 if (!new_batch) 3400 return -ENOMEM; 3401 3402 bpf_iter_unix_put_batch(iter); 3403 kvfree(iter->batch); 3404 iter->batch = new_batch; 3405 iter->max_sk = new_batch_sz; 3406 3407 return 0; 3408 } 3409 3410 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3411 loff_t *pos) 3412 { 3413 struct bpf_unix_iter_state *iter = seq->private; 3414 unsigned int expected; 3415 bool resized = false; 3416 struct sock *sk; 3417 3418 if (iter->st_bucket_done) 3419 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3420 3421 again: 3422 /* Get a new batch */ 3423 iter->cur_sk = 0; 3424 iter->end_sk = 0; 3425 3426 sk = unix_get_first(seq, pos); 3427 if (!sk) 3428 return NULL; /* Done */ 3429 3430 expected = bpf_iter_unix_hold_batch(seq, sk); 3431 3432 if (iter->end_sk == expected) { 3433 iter->st_bucket_done = true; 3434 return sk; 3435 } 3436 3437 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3438 resized = true; 3439 goto again; 3440 } 3441 3442 return sk; 3443 } 3444 3445 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3446 { 3447 if (!*pos) 3448 return SEQ_START_TOKEN; 3449 3450 /* bpf iter does not support lseek, so it always 3451 * continue from where it was stop()-ped. 3452 */ 3453 return bpf_iter_unix_batch(seq, pos); 3454 } 3455 3456 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3457 { 3458 struct bpf_unix_iter_state *iter = seq->private; 3459 struct sock *sk; 3460 3461 /* Whenever seq_next() is called, the iter->cur_sk is 3462 * done with seq_show(), so advance to the next sk in 3463 * the batch. 3464 */ 3465 if (iter->cur_sk < iter->end_sk) 3466 sock_put(iter->batch[iter->cur_sk++]); 3467 3468 ++*pos; 3469 3470 if (iter->cur_sk < iter->end_sk) 3471 sk = iter->batch[iter->cur_sk]; 3472 else 3473 sk = bpf_iter_unix_batch(seq, pos); 3474 3475 return sk; 3476 } 3477 3478 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3479 { 3480 struct bpf_iter_meta meta; 3481 struct bpf_prog *prog; 3482 struct sock *sk = v; 3483 uid_t uid; 3484 bool slow; 3485 int ret; 3486 3487 if (v == SEQ_START_TOKEN) 3488 return 0; 3489 3490 slow = lock_sock_fast(sk); 3491 3492 if (unlikely(sk_unhashed(sk))) { 3493 ret = SEQ_SKIP; 3494 goto unlock; 3495 } 3496 3497 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3498 meta.seq = seq; 3499 prog = bpf_iter_get_info(&meta, false); 3500 ret = unix_prog_seq_show(prog, &meta, v, uid); 3501 unlock: 3502 unlock_sock_fast(sk, slow); 3503 return ret; 3504 } 3505 3506 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3507 { 3508 struct bpf_unix_iter_state *iter = seq->private; 3509 struct bpf_iter_meta meta; 3510 struct bpf_prog *prog; 3511 3512 if (!v) { 3513 meta.seq = seq; 3514 prog = bpf_iter_get_info(&meta, true); 3515 if (prog) 3516 (void)unix_prog_seq_show(prog, &meta, v, 0); 3517 } 3518 3519 if (iter->cur_sk < iter->end_sk) 3520 bpf_iter_unix_put_batch(iter); 3521 } 3522 3523 static const struct seq_operations bpf_iter_unix_seq_ops = { 3524 .start = bpf_iter_unix_seq_start, 3525 .next = bpf_iter_unix_seq_next, 3526 .stop = bpf_iter_unix_seq_stop, 3527 .show = bpf_iter_unix_seq_show, 3528 }; 3529 #endif 3530 #endif 3531 3532 static const struct net_proto_family unix_family_ops = { 3533 .family = PF_UNIX, 3534 .create = unix_create, 3535 .owner = THIS_MODULE, 3536 }; 3537 3538 3539 static int __net_init unix_net_init(struct net *net) 3540 { 3541 int i; 3542 3543 net->unx.sysctl_max_dgram_qlen = 10; 3544 if (unix_sysctl_register(net)) 3545 goto out; 3546 3547 #ifdef CONFIG_PROC_FS 3548 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3549 sizeof(struct seq_net_private))) 3550 goto err_sysctl; 3551 #endif 3552 3553 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3554 sizeof(spinlock_t), GFP_KERNEL); 3555 if (!net->unx.table.locks) 3556 goto err_proc; 3557 3558 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3559 sizeof(struct hlist_head), 3560 GFP_KERNEL); 3561 if (!net->unx.table.buckets) 3562 goto free_locks; 3563 3564 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3565 spin_lock_init(&net->unx.table.locks[i]); 3566 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3567 } 3568 3569 return 0; 3570 3571 free_locks: 3572 kvfree(net->unx.table.locks); 3573 err_proc: 3574 #ifdef CONFIG_PROC_FS 3575 remove_proc_entry("unix", net->proc_net); 3576 err_sysctl: 3577 #endif 3578 unix_sysctl_unregister(net); 3579 out: 3580 return -ENOMEM; 3581 } 3582 3583 static void __net_exit unix_net_exit(struct net *net) 3584 { 3585 kvfree(net->unx.table.buckets); 3586 kvfree(net->unx.table.locks); 3587 unix_sysctl_unregister(net); 3588 remove_proc_entry("unix", net->proc_net); 3589 } 3590 3591 static struct pernet_operations unix_net_ops = { 3592 .init = unix_net_init, 3593 .exit = unix_net_exit, 3594 }; 3595 3596 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3597 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3598 struct unix_sock *unix_sk, uid_t uid) 3599 3600 #define INIT_BATCH_SZ 16 3601 3602 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3603 { 3604 struct bpf_unix_iter_state *iter = priv_data; 3605 int err; 3606 3607 err = bpf_iter_init_seq_net(priv_data, aux); 3608 if (err) 3609 return err; 3610 3611 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3612 if (err) { 3613 bpf_iter_fini_seq_net(priv_data); 3614 return err; 3615 } 3616 3617 return 0; 3618 } 3619 3620 static void bpf_iter_fini_unix(void *priv_data) 3621 { 3622 struct bpf_unix_iter_state *iter = priv_data; 3623 3624 bpf_iter_fini_seq_net(priv_data); 3625 kvfree(iter->batch); 3626 } 3627 3628 static const struct bpf_iter_seq_info unix_seq_info = { 3629 .seq_ops = &bpf_iter_unix_seq_ops, 3630 .init_seq_private = bpf_iter_init_unix, 3631 .fini_seq_private = bpf_iter_fini_unix, 3632 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3633 }; 3634 3635 static const struct bpf_func_proto * 3636 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3637 const struct bpf_prog *prog) 3638 { 3639 switch (func_id) { 3640 case BPF_FUNC_setsockopt: 3641 return &bpf_sk_setsockopt_proto; 3642 case BPF_FUNC_getsockopt: 3643 return &bpf_sk_getsockopt_proto; 3644 default: 3645 return NULL; 3646 } 3647 } 3648 3649 static struct bpf_iter_reg unix_reg_info = { 3650 .target = "unix", 3651 .ctx_arg_info_size = 1, 3652 .ctx_arg_info = { 3653 { offsetof(struct bpf_iter__unix, unix_sk), 3654 PTR_TO_BTF_ID_OR_NULL }, 3655 }, 3656 .get_func_proto = bpf_iter_unix_get_func_proto, 3657 .seq_info = &unix_seq_info, 3658 }; 3659 3660 static void __init bpf_iter_register(void) 3661 { 3662 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3663 if (bpf_iter_reg_target(&unix_reg_info)) 3664 pr_warn("Warning: could not register bpf iterator unix\n"); 3665 } 3666 #endif 3667 3668 static int __init af_unix_init(void) 3669 { 3670 int i, rc = -1; 3671 3672 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3673 3674 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3675 spin_lock_init(&bsd_socket_locks[i]); 3676 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3677 } 3678 3679 rc = proto_register(&unix_dgram_proto, 1); 3680 if (rc != 0) { 3681 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3682 goto out; 3683 } 3684 3685 rc = proto_register(&unix_stream_proto, 1); 3686 if (rc != 0) { 3687 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3688 proto_unregister(&unix_dgram_proto); 3689 goto out; 3690 } 3691 3692 sock_register(&unix_family_ops); 3693 register_pernet_subsys(&unix_net_ops); 3694 unix_bpf_build_proto(); 3695 3696 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3697 bpf_iter_register(); 3698 #endif 3699 3700 out: 3701 return rc; 3702 } 3703 3704 static void __exit af_unix_exit(void) 3705 { 3706 sock_unregister(PF_UNIX); 3707 proto_unregister(&unix_dgram_proto); 3708 proto_unregister(&unix_stream_proto); 3709 unregister_pernet_subsys(&unix_net_ops); 3710 } 3711 3712 /* Earlier than device_initcall() so that other drivers invoking 3713 request_module() don't end up in a loop when modprobe tries 3714 to use a UNIX socket. But later than subsys_initcall() because 3715 we depend on stuff initialised there */ 3716 fs_initcall(af_unix_init); 3717 module_exit(af_unix_exit); 3718 3719 MODULE_LICENSE("GPL"); 3720 MODULE_ALIAS_NETPROTO(PF_UNIX); 3721