1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/splice.h> 116 #include <linux/freezer.h> 117 #include <linux/file.h> 118 #include <linux/btf_ids.h> 119 120 #include "scm.h" 121 122 static atomic_long_t unix_nr_socks; 123 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 124 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 125 126 /* SMP locking strategy: 127 * hash table is protected with spinlock. 128 * each socket state is protected by separate spinlock. 129 */ 130 131 static unsigned int unix_unbound_hash(struct sock *sk) 132 { 133 unsigned long hash = (unsigned long)sk; 134 135 hash ^= hash >> 16; 136 hash ^= hash >> 8; 137 hash ^= sk->sk_type; 138 139 return hash & UNIX_HASH_MOD; 140 } 141 142 static unsigned int unix_bsd_hash(struct inode *i) 143 { 144 return i->i_ino & UNIX_HASH_MOD; 145 } 146 147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 148 int addr_len, int type) 149 { 150 __wsum csum = csum_partial(sunaddr, addr_len, 0); 151 unsigned int hash; 152 153 hash = (__force unsigned int)csum_fold(csum); 154 hash ^= hash >> 8; 155 hash ^= type; 156 157 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 158 } 159 160 static void unix_table_double_lock(struct net *net, 161 unsigned int hash1, unsigned int hash2) 162 { 163 if (hash1 == hash2) { 164 spin_lock(&net->unx.table.locks[hash1]); 165 return; 166 } 167 168 if (hash1 > hash2) 169 swap(hash1, hash2); 170 171 spin_lock(&net->unx.table.locks[hash1]); 172 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 173 } 174 175 static void unix_table_double_unlock(struct net *net, 176 unsigned int hash1, unsigned int hash2) 177 { 178 if (hash1 == hash2) { 179 spin_unlock(&net->unx.table.locks[hash1]); 180 return; 181 } 182 183 spin_unlock(&net->unx.table.locks[hash1]); 184 spin_unlock(&net->unx.table.locks[hash2]); 185 } 186 187 #ifdef CONFIG_SECURITY_NETWORK 188 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 189 { 190 UNIXCB(skb).secid = scm->secid; 191 } 192 193 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 194 { 195 scm->secid = UNIXCB(skb).secid; 196 } 197 198 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 199 { 200 return (scm->secid == UNIXCB(skb).secid); 201 } 202 #else 203 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 204 { } 205 206 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 207 { } 208 209 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 210 { 211 return true; 212 } 213 #endif /* CONFIG_SECURITY_NETWORK */ 214 215 #define unix_peer(sk) (unix_sk(sk)->peer) 216 217 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 218 { 219 return unix_peer(osk) == sk; 220 } 221 222 static inline int unix_may_send(struct sock *sk, struct sock *osk) 223 { 224 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 225 } 226 227 static inline int unix_recvq_full(const struct sock *sk) 228 { 229 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 230 } 231 232 static inline int unix_recvq_full_lockless(const struct sock *sk) 233 { 234 return skb_queue_len_lockless(&sk->sk_receive_queue) > 235 READ_ONCE(sk->sk_max_ack_backlog); 236 } 237 238 struct sock *unix_peer_get(struct sock *s) 239 { 240 struct sock *peer; 241 242 unix_state_lock(s); 243 peer = unix_peer(s); 244 if (peer) 245 sock_hold(peer); 246 unix_state_unlock(s); 247 return peer; 248 } 249 EXPORT_SYMBOL_GPL(unix_peer_get); 250 251 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 252 int addr_len) 253 { 254 struct unix_address *addr; 255 256 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 257 if (!addr) 258 return NULL; 259 260 refcount_set(&addr->refcnt, 1); 261 addr->len = addr_len; 262 memcpy(addr->name, sunaddr, addr_len); 263 264 return addr; 265 } 266 267 static inline void unix_release_addr(struct unix_address *addr) 268 { 269 if (refcount_dec_and_test(&addr->refcnt)) 270 kfree(addr); 271 } 272 273 /* 274 * Check unix socket name: 275 * - should be not zero length. 276 * - if started by not zero, should be NULL terminated (FS object) 277 * - if started by zero, it is abstract name. 278 */ 279 280 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 281 { 282 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 283 addr_len > sizeof(*sunaddr)) 284 return -EINVAL; 285 286 if (sunaddr->sun_family != AF_UNIX) 287 return -EINVAL; 288 289 return 0; 290 } 291 292 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 293 { 294 /* This may look like an off by one error but it is a bit more 295 * subtle. 108 is the longest valid AF_UNIX path for a binding. 296 * sun_path[108] doesn't as such exist. However in kernel space 297 * we are guaranteed that it is a valid memory location in our 298 * kernel address buffer because syscall functions always pass 299 * a pointer of struct sockaddr_storage which has a bigger buffer 300 * than 108. 301 */ 302 ((char *)sunaddr)[addr_len] = 0; 303 } 304 305 static void __unix_remove_socket(struct sock *sk) 306 { 307 sk_del_node_init(sk); 308 } 309 310 static void __unix_insert_socket(struct net *net, struct sock *sk) 311 { 312 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 313 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 314 } 315 316 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 317 struct unix_address *addr, unsigned int hash) 318 { 319 __unix_remove_socket(sk); 320 smp_store_release(&unix_sk(sk)->addr, addr); 321 322 sk->sk_hash = hash; 323 __unix_insert_socket(net, sk); 324 } 325 326 static void unix_remove_socket(struct net *net, struct sock *sk) 327 { 328 spin_lock(&net->unx.table.locks[sk->sk_hash]); 329 __unix_remove_socket(sk); 330 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 331 } 332 333 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 334 { 335 spin_lock(&net->unx.table.locks[sk->sk_hash]); 336 __unix_insert_socket(net, sk); 337 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 338 } 339 340 static void unix_insert_bsd_socket(struct sock *sk) 341 { 342 spin_lock(&bsd_socket_locks[sk->sk_hash]); 343 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 344 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 345 } 346 347 static void unix_remove_bsd_socket(struct sock *sk) 348 { 349 if (!hlist_unhashed(&sk->sk_bind_node)) { 350 spin_lock(&bsd_socket_locks[sk->sk_hash]); 351 __sk_del_bind_node(sk); 352 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 353 354 sk_node_init(&sk->sk_bind_node); 355 } 356 } 357 358 static struct sock *__unix_find_socket_byname(struct net *net, 359 struct sockaddr_un *sunname, 360 int len, unsigned int hash) 361 { 362 struct sock *s; 363 364 sk_for_each(s, &net->unx.table.buckets[hash]) { 365 struct unix_sock *u = unix_sk(s); 366 367 if (u->addr->len == len && 368 !memcmp(u->addr->name, sunname, len)) 369 return s; 370 } 371 return NULL; 372 } 373 374 static inline struct sock *unix_find_socket_byname(struct net *net, 375 struct sockaddr_un *sunname, 376 int len, unsigned int hash) 377 { 378 struct sock *s; 379 380 spin_lock(&net->unx.table.locks[hash]); 381 s = __unix_find_socket_byname(net, sunname, len, hash); 382 if (s) 383 sock_hold(s); 384 spin_unlock(&net->unx.table.locks[hash]); 385 return s; 386 } 387 388 static struct sock *unix_find_socket_byinode(struct inode *i) 389 { 390 unsigned int hash = unix_bsd_hash(i); 391 struct sock *s; 392 393 spin_lock(&bsd_socket_locks[hash]); 394 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 395 struct dentry *dentry = unix_sk(s)->path.dentry; 396 397 if (dentry && d_backing_inode(dentry) == i) { 398 sock_hold(s); 399 spin_unlock(&bsd_socket_locks[hash]); 400 return s; 401 } 402 } 403 spin_unlock(&bsd_socket_locks[hash]); 404 return NULL; 405 } 406 407 /* Support code for asymmetrically connected dgram sockets 408 * 409 * If a datagram socket is connected to a socket not itself connected 410 * to the first socket (eg, /dev/log), clients may only enqueue more 411 * messages if the present receive queue of the server socket is not 412 * "too large". This means there's a second writeability condition 413 * poll and sendmsg need to test. The dgram recv code will do a wake 414 * up on the peer_wait wait queue of a socket upon reception of a 415 * datagram which needs to be propagated to sleeping would-be writers 416 * since these might not have sent anything so far. This can't be 417 * accomplished via poll_wait because the lifetime of the server 418 * socket might be less than that of its clients if these break their 419 * association with it or if the server socket is closed while clients 420 * are still connected to it and there's no way to inform "a polling 421 * implementation" that it should let go of a certain wait queue 422 * 423 * In order to propagate a wake up, a wait_queue_entry_t of the client 424 * socket is enqueued on the peer_wait queue of the server socket 425 * whose wake function does a wake_up on the ordinary client socket 426 * wait queue. This connection is established whenever a write (or 427 * poll for write) hit the flow control condition and broken when the 428 * association to the server socket is dissolved or after a wake up 429 * was relayed. 430 */ 431 432 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 433 void *key) 434 { 435 struct unix_sock *u; 436 wait_queue_head_t *u_sleep; 437 438 u = container_of(q, struct unix_sock, peer_wake); 439 440 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 441 q); 442 u->peer_wake.private = NULL; 443 444 /* relaying can only happen while the wq still exists */ 445 u_sleep = sk_sleep(&u->sk); 446 if (u_sleep) 447 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 448 449 return 0; 450 } 451 452 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 453 { 454 struct unix_sock *u, *u_other; 455 int rc; 456 457 u = unix_sk(sk); 458 u_other = unix_sk(other); 459 rc = 0; 460 spin_lock(&u_other->peer_wait.lock); 461 462 if (!u->peer_wake.private) { 463 u->peer_wake.private = other; 464 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 465 466 rc = 1; 467 } 468 469 spin_unlock(&u_other->peer_wait.lock); 470 return rc; 471 } 472 473 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 474 struct sock *other) 475 { 476 struct unix_sock *u, *u_other; 477 478 u = unix_sk(sk); 479 u_other = unix_sk(other); 480 spin_lock(&u_other->peer_wait.lock); 481 482 if (u->peer_wake.private == other) { 483 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 484 u->peer_wake.private = NULL; 485 } 486 487 spin_unlock(&u_other->peer_wait.lock); 488 } 489 490 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 491 struct sock *other) 492 { 493 unix_dgram_peer_wake_disconnect(sk, other); 494 wake_up_interruptible_poll(sk_sleep(sk), 495 EPOLLOUT | 496 EPOLLWRNORM | 497 EPOLLWRBAND); 498 } 499 500 /* preconditions: 501 * - unix_peer(sk) == other 502 * - association is stable 503 */ 504 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 505 { 506 int connected; 507 508 connected = unix_dgram_peer_wake_connect(sk, other); 509 510 /* If other is SOCK_DEAD, we want to make sure we signal 511 * POLLOUT, such that a subsequent write() can get a 512 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 513 * to other and its full, we will hang waiting for POLLOUT. 514 */ 515 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 516 return 1; 517 518 if (connected) 519 unix_dgram_peer_wake_disconnect(sk, other); 520 521 return 0; 522 } 523 524 static int unix_writable(const struct sock *sk) 525 { 526 return sk->sk_state != TCP_LISTEN && 527 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 528 } 529 530 static void unix_write_space(struct sock *sk) 531 { 532 struct socket_wq *wq; 533 534 rcu_read_lock(); 535 if (unix_writable(sk)) { 536 wq = rcu_dereference(sk->sk_wq); 537 if (skwq_has_sleeper(wq)) 538 wake_up_interruptible_sync_poll(&wq->wait, 539 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 540 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 541 } 542 rcu_read_unlock(); 543 } 544 545 /* When dgram socket disconnects (or changes its peer), we clear its receive 546 * queue of packets arrived from previous peer. First, it allows to do 547 * flow control based only on wmem_alloc; second, sk connected to peer 548 * may receive messages only from that peer. */ 549 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 550 { 551 if (!skb_queue_empty(&sk->sk_receive_queue)) { 552 skb_queue_purge(&sk->sk_receive_queue); 553 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 554 555 /* If one link of bidirectional dgram pipe is disconnected, 556 * we signal error. Messages are lost. Do not make this, 557 * when peer was not connected to us. 558 */ 559 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 560 WRITE_ONCE(other->sk_err, ECONNRESET); 561 sk_error_report(other); 562 } 563 } 564 other->sk_state = TCP_CLOSE; 565 } 566 567 static void unix_sock_destructor(struct sock *sk) 568 { 569 struct unix_sock *u = unix_sk(sk); 570 571 skb_queue_purge(&sk->sk_receive_queue); 572 573 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 574 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 575 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 576 if (!sock_flag(sk, SOCK_DEAD)) { 577 pr_info("Attempt to release alive unix socket: %p\n", sk); 578 return; 579 } 580 581 if (u->addr) 582 unix_release_addr(u->addr); 583 584 atomic_long_dec(&unix_nr_socks); 585 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 586 #ifdef UNIX_REFCNT_DEBUG 587 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 588 atomic_long_read(&unix_nr_socks)); 589 #endif 590 } 591 592 static void unix_release_sock(struct sock *sk, int embrion) 593 { 594 struct unix_sock *u = unix_sk(sk); 595 struct sock *skpair; 596 struct sk_buff *skb; 597 struct path path; 598 int state; 599 600 unix_remove_socket(sock_net(sk), sk); 601 unix_remove_bsd_socket(sk); 602 603 /* Clear state */ 604 unix_state_lock(sk); 605 sock_orphan(sk); 606 sk->sk_shutdown = SHUTDOWN_MASK; 607 path = u->path; 608 u->path.dentry = NULL; 609 u->path.mnt = NULL; 610 state = sk->sk_state; 611 sk->sk_state = TCP_CLOSE; 612 613 skpair = unix_peer(sk); 614 unix_peer(sk) = NULL; 615 616 unix_state_unlock(sk); 617 618 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 619 if (u->oob_skb) { 620 kfree_skb(u->oob_skb); 621 u->oob_skb = NULL; 622 } 623 #endif 624 625 wake_up_interruptible_all(&u->peer_wait); 626 627 if (skpair != NULL) { 628 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 629 unix_state_lock(skpair); 630 /* No more writes */ 631 skpair->sk_shutdown = SHUTDOWN_MASK; 632 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 633 WRITE_ONCE(skpair->sk_err, ECONNRESET); 634 unix_state_unlock(skpair); 635 skpair->sk_state_change(skpair); 636 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 637 } 638 639 unix_dgram_peer_wake_disconnect(sk, skpair); 640 sock_put(skpair); /* It may now die */ 641 } 642 643 /* Try to flush out this socket. Throw out buffers at least */ 644 645 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 646 if (state == TCP_LISTEN) 647 unix_release_sock(skb->sk, 1); 648 /* passed fds are erased in the kfree_skb hook */ 649 UNIXCB(skb).consumed = skb->len; 650 kfree_skb(skb); 651 } 652 653 if (path.dentry) 654 path_put(&path); 655 656 sock_put(sk); 657 658 /* ---- Socket is dead now and most probably destroyed ---- */ 659 660 /* 661 * Fixme: BSD difference: In BSD all sockets connected to us get 662 * ECONNRESET and we die on the spot. In Linux we behave 663 * like files and pipes do and wait for the last 664 * dereference. 665 * 666 * Can't we simply set sock->err? 667 * 668 * What the above comment does talk about? --ANK(980817) 669 */ 670 671 if (unix_tot_inflight) 672 unix_gc(); /* Garbage collect fds */ 673 } 674 675 static void init_peercred(struct sock *sk) 676 { 677 const struct cred *old_cred; 678 struct pid *old_pid; 679 680 spin_lock(&sk->sk_peer_lock); 681 old_pid = sk->sk_peer_pid; 682 old_cred = sk->sk_peer_cred; 683 sk->sk_peer_pid = get_pid(task_tgid(current)); 684 sk->sk_peer_cred = get_current_cred(); 685 spin_unlock(&sk->sk_peer_lock); 686 687 put_pid(old_pid); 688 put_cred(old_cred); 689 } 690 691 static void copy_peercred(struct sock *sk, struct sock *peersk) 692 { 693 const struct cred *old_cred; 694 struct pid *old_pid; 695 696 if (sk < peersk) { 697 spin_lock(&sk->sk_peer_lock); 698 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 699 } else { 700 spin_lock(&peersk->sk_peer_lock); 701 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 702 } 703 old_pid = sk->sk_peer_pid; 704 old_cred = sk->sk_peer_cred; 705 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 706 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 707 708 spin_unlock(&sk->sk_peer_lock); 709 spin_unlock(&peersk->sk_peer_lock); 710 711 put_pid(old_pid); 712 put_cred(old_cred); 713 } 714 715 static int unix_listen(struct socket *sock, int backlog) 716 { 717 int err; 718 struct sock *sk = sock->sk; 719 struct unix_sock *u = unix_sk(sk); 720 721 err = -EOPNOTSUPP; 722 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 723 goto out; /* Only stream/seqpacket sockets accept */ 724 err = -EINVAL; 725 if (!u->addr) 726 goto out; /* No listens on an unbound socket */ 727 unix_state_lock(sk); 728 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 729 goto out_unlock; 730 if (backlog > sk->sk_max_ack_backlog) 731 wake_up_interruptible_all(&u->peer_wait); 732 sk->sk_max_ack_backlog = backlog; 733 sk->sk_state = TCP_LISTEN; 734 /* set credentials so connect can copy them */ 735 init_peercred(sk); 736 err = 0; 737 738 out_unlock: 739 unix_state_unlock(sk); 740 out: 741 return err; 742 } 743 744 static int unix_release(struct socket *); 745 static int unix_bind(struct socket *, struct sockaddr *, int); 746 static int unix_stream_connect(struct socket *, struct sockaddr *, 747 int addr_len, int flags); 748 static int unix_socketpair(struct socket *, struct socket *); 749 static int unix_accept(struct socket *, struct socket *, int, bool); 750 static int unix_getname(struct socket *, struct sockaddr *, int); 751 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 752 static __poll_t unix_dgram_poll(struct file *, struct socket *, 753 poll_table *); 754 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 755 #ifdef CONFIG_COMPAT 756 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 757 #endif 758 static int unix_shutdown(struct socket *, int); 759 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 760 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 761 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 762 size_t size, int flags); 763 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 764 struct pipe_inode_info *, size_t size, 765 unsigned int flags); 766 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 767 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 768 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 769 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 770 static int unix_dgram_connect(struct socket *, struct sockaddr *, 771 int, int); 772 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 773 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 774 int); 775 776 static int unix_set_peek_off(struct sock *sk, int val) 777 { 778 struct unix_sock *u = unix_sk(sk); 779 780 if (mutex_lock_interruptible(&u->iolock)) 781 return -EINTR; 782 783 sk->sk_peek_off = val; 784 mutex_unlock(&u->iolock); 785 786 return 0; 787 } 788 789 #ifdef CONFIG_PROC_FS 790 static int unix_count_nr_fds(struct sock *sk) 791 { 792 struct sk_buff *skb; 793 struct unix_sock *u; 794 int nr_fds = 0; 795 796 spin_lock(&sk->sk_receive_queue.lock); 797 skb = skb_peek(&sk->sk_receive_queue); 798 while (skb) { 799 u = unix_sk(skb->sk); 800 nr_fds += atomic_read(&u->scm_stat.nr_fds); 801 skb = skb_peek_next(skb, &sk->sk_receive_queue); 802 } 803 spin_unlock(&sk->sk_receive_queue.lock); 804 805 return nr_fds; 806 } 807 808 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 809 { 810 struct sock *sk = sock->sk; 811 unsigned char s_state; 812 struct unix_sock *u; 813 int nr_fds = 0; 814 815 if (sk) { 816 s_state = READ_ONCE(sk->sk_state); 817 u = unix_sk(sk); 818 819 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their 820 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN. 821 * SOCK_DGRAM is ordinary. So, no lock is needed. 822 */ 823 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED) 824 nr_fds = atomic_read(&u->scm_stat.nr_fds); 825 else if (s_state == TCP_LISTEN) 826 nr_fds = unix_count_nr_fds(sk); 827 828 seq_printf(m, "scm_fds: %u\n", nr_fds); 829 } 830 } 831 #else 832 #define unix_show_fdinfo NULL 833 #endif 834 835 static const struct proto_ops unix_stream_ops = { 836 .family = PF_UNIX, 837 .owner = THIS_MODULE, 838 .release = unix_release, 839 .bind = unix_bind, 840 .connect = unix_stream_connect, 841 .socketpair = unix_socketpair, 842 .accept = unix_accept, 843 .getname = unix_getname, 844 .poll = unix_poll, 845 .ioctl = unix_ioctl, 846 #ifdef CONFIG_COMPAT 847 .compat_ioctl = unix_compat_ioctl, 848 #endif 849 .listen = unix_listen, 850 .shutdown = unix_shutdown, 851 .sendmsg = unix_stream_sendmsg, 852 .recvmsg = unix_stream_recvmsg, 853 .read_skb = unix_stream_read_skb, 854 .mmap = sock_no_mmap, 855 .sendpage = unix_stream_sendpage, 856 .splice_read = unix_stream_splice_read, 857 .set_peek_off = unix_set_peek_off, 858 .show_fdinfo = unix_show_fdinfo, 859 }; 860 861 static const struct proto_ops unix_dgram_ops = { 862 .family = PF_UNIX, 863 .owner = THIS_MODULE, 864 .release = unix_release, 865 .bind = unix_bind, 866 .connect = unix_dgram_connect, 867 .socketpair = unix_socketpair, 868 .accept = sock_no_accept, 869 .getname = unix_getname, 870 .poll = unix_dgram_poll, 871 .ioctl = unix_ioctl, 872 #ifdef CONFIG_COMPAT 873 .compat_ioctl = unix_compat_ioctl, 874 #endif 875 .listen = sock_no_listen, 876 .shutdown = unix_shutdown, 877 .sendmsg = unix_dgram_sendmsg, 878 .read_skb = unix_read_skb, 879 .recvmsg = unix_dgram_recvmsg, 880 .mmap = sock_no_mmap, 881 .sendpage = sock_no_sendpage, 882 .set_peek_off = unix_set_peek_off, 883 .show_fdinfo = unix_show_fdinfo, 884 }; 885 886 static const struct proto_ops unix_seqpacket_ops = { 887 .family = PF_UNIX, 888 .owner = THIS_MODULE, 889 .release = unix_release, 890 .bind = unix_bind, 891 .connect = unix_stream_connect, 892 .socketpair = unix_socketpair, 893 .accept = unix_accept, 894 .getname = unix_getname, 895 .poll = unix_dgram_poll, 896 .ioctl = unix_ioctl, 897 #ifdef CONFIG_COMPAT 898 .compat_ioctl = unix_compat_ioctl, 899 #endif 900 .listen = unix_listen, 901 .shutdown = unix_shutdown, 902 .sendmsg = unix_seqpacket_sendmsg, 903 .recvmsg = unix_seqpacket_recvmsg, 904 .mmap = sock_no_mmap, 905 .sendpage = sock_no_sendpage, 906 .set_peek_off = unix_set_peek_off, 907 .show_fdinfo = unix_show_fdinfo, 908 }; 909 910 static void unix_close(struct sock *sk, long timeout) 911 { 912 /* Nothing to do here, unix socket does not need a ->close(). 913 * This is merely for sockmap. 914 */ 915 } 916 917 static void unix_unhash(struct sock *sk) 918 { 919 /* Nothing to do here, unix socket does not need a ->unhash(). 920 * This is merely for sockmap. 921 */ 922 } 923 924 struct proto unix_dgram_proto = { 925 .name = "UNIX", 926 .owner = THIS_MODULE, 927 .obj_size = sizeof(struct unix_sock), 928 .close = unix_close, 929 #ifdef CONFIG_BPF_SYSCALL 930 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 931 #endif 932 }; 933 934 struct proto unix_stream_proto = { 935 .name = "UNIX-STREAM", 936 .owner = THIS_MODULE, 937 .obj_size = sizeof(struct unix_sock), 938 .close = unix_close, 939 .unhash = unix_unhash, 940 #ifdef CONFIG_BPF_SYSCALL 941 .psock_update_sk_prot = unix_stream_bpf_update_proto, 942 #endif 943 }; 944 945 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 946 { 947 struct unix_sock *u; 948 struct sock *sk; 949 int err; 950 951 atomic_long_inc(&unix_nr_socks); 952 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 953 err = -ENFILE; 954 goto err; 955 } 956 957 if (type == SOCK_STREAM) 958 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 959 else /*dgram and seqpacket */ 960 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 961 962 if (!sk) { 963 err = -ENOMEM; 964 goto err; 965 } 966 967 sock_init_data(sock, sk); 968 969 sk->sk_hash = unix_unbound_hash(sk); 970 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 971 sk->sk_write_space = unix_write_space; 972 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 973 sk->sk_destruct = unix_sock_destructor; 974 u = unix_sk(sk); 975 u->path.dentry = NULL; 976 u->path.mnt = NULL; 977 spin_lock_init(&u->lock); 978 atomic_long_set(&u->inflight, 0); 979 INIT_LIST_HEAD(&u->link); 980 mutex_init(&u->iolock); /* single task reading lock */ 981 mutex_init(&u->bindlock); /* single task binding lock */ 982 init_waitqueue_head(&u->peer_wait); 983 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 984 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 985 unix_insert_unbound_socket(net, sk); 986 987 sock_prot_inuse_add(net, sk->sk_prot, 1); 988 989 return sk; 990 991 err: 992 atomic_long_dec(&unix_nr_socks); 993 return ERR_PTR(err); 994 } 995 996 static int unix_create(struct net *net, struct socket *sock, int protocol, 997 int kern) 998 { 999 struct sock *sk; 1000 1001 if (protocol && protocol != PF_UNIX) 1002 return -EPROTONOSUPPORT; 1003 1004 sock->state = SS_UNCONNECTED; 1005 1006 switch (sock->type) { 1007 case SOCK_STREAM: 1008 sock->ops = &unix_stream_ops; 1009 break; 1010 /* 1011 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 1012 * nothing uses it. 1013 */ 1014 case SOCK_RAW: 1015 sock->type = SOCK_DGRAM; 1016 fallthrough; 1017 case SOCK_DGRAM: 1018 sock->ops = &unix_dgram_ops; 1019 break; 1020 case SOCK_SEQPACKET: 1021 sock->ops = &unix_seqpacket_ops; 1022 break; 1023 default: 1024 return -ESOCKTNOSUPPORT; 1025 } 1026 1027 sk = unix_create1(net, sock, kern, sock->type); 1028 if (IS_ERR(sk)) 1029 return PTR_ERR(sk); 1030 1031 return 0; 1032 } 1033 1034 static int unix_release(struct socket *sock) 1035 { 1036 struct sock *sk = sock->sk; 1037 1038 if (!sk) 1039 return 0; 1040 1041 sk->sk_prot->close(sk, 0); 1042 unix_release_sock(sk, 0); 1043 sock->sk = NULL; 1044 1045 return 0; 1046 } 1047 1048 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1049 int type) 1050 { 1051 struct inode *inode; 1052 struct path path; 1053 struct sock *sk; 1054 int err; 1055 1056 unix_mkname_bsd(sunaddr, addr_len); 1057 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1058 if (err) 1059 goto fail; 1060 1061 err = path_permission(&path, MAY_WRITE); 1062 if (err) 1063 goto path_put; 1064 1065 err = -ECONNREFUSED; 1066 inode = d_backing_inode(path.dentry); 1067 if (!S_ISSOCK(inode->i_mode)) 1068 goto path_put; 1069 1070 sk = unix_find_socket_byinode(inode); 1071 if (!sk) 1072 goto path_put; 1073 1074 err = -EPROTOTYPE; 1075 if (sk->sk_type == type) 1076 touch_atime(&path); 1077 else 1078 goto sock_put; 1079 1080 path_put(&path); 1081 1082 return sk; 1083 1084 sock_put: 1085 sock_put(sk); 1086 path_put: 1087 path_put(&path); 1088 fail: 1089 return ERR_PTR(err); 1090 } 1091 1092 static struct sock *unix_find_abstract(struct net *net, 1093 struct sockaddr_un *sunaddr, 1094 int addr_len, int type) 1095 { 1096 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1097 struct dentry *dentry; 1098 struct sock *sk; 1099 1100 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1101 if (!sk) 1102 return ERR_PTR(-ECONNREFUSED); 1103 1104 dentry = unix_sk(sk)->path.dentry; 1105 if (dentry) 1106 touch_atime(&unix_sk(sk)->path); 1107 1108 return sk; 1109 } 1110 1111 static struct sock *unix_find_other(struct net *net, 1112 struct sockaddr_un *sunaddr, 1113 int addr_len, int type) 1114 { 1115 struct sock *sk; 1116 1117 if (sunaddr->sun_path[0]) 1118 sk = unix_find_bsd(sunaddr, addr_len, type); 1119 else 1120 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1121 1122 return sk; 1123 } 1124 1125 static int unix_autobind(struct sock *sk) 1126 { 1127 unsigned int new_hash, old_hash = sk->sk_hash; 1128 struct unix_sock *u = unix_sk(sk); 1129 struct net *net = sock_net(sk); 1130 struct unix_address *addr; 1131 u32 lastnum, ordernum; 1132 int err; 1133 1134 err = mutex_lock_interruptible(&u->bindlock); 1135 if (err) 1136 return err; 1137 1138 if (u->addr) 1139 goto out; 1140 1141 err = -ENOMEM; 1142 addr = kzalloc(sizeof(*addr) + 1143 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1144 if (!addr) 1145 goto out; 1146 1147 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1148 addr->name->sun_family = AF_UNIX; 1149 refcount_set(&addr->refcnt, 1); 1150 1151 ordernum = get_random_u32(); 1152 lastnum = ordernum & 0xFFFFF; 1153 retry: 1154 ordernum = (ordernum + 1) & 0xFFFFF; 1155 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1156 1157 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1158 unix_table_double_lock(net, old_hash, new_hash); 1159 1160 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1161 unix_table_double_unlock(net, old_hash, new_hash); 1162 1163 /* __unix_find_socket_byname() may take long time if many names 1164 * are already in use. 1165 */ 1166 cond_resched(); 1167 1168 if (ordernum == lastnum) { 1169 /* Give up if all names seems to be in use. */ 1170 err = -ENOSPC; 1171 unix_release_addr(addr); 1172 goto out; 1173 } 1174 1175 goto retry; 1176 } 1177 1178 __unix_set_addr_hash(net, sk, addr, new_hash); 1179 unix_table_double_unlock(net, old_hash, new_hash); 1180 err = 0; 1181 1182 out: mutex_unlock(&u->bindlock); 1183 return err; 1184 } 1185 1186 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1187 int addr_len) 1188 { 1189 umode_t mode = S_IFSOCK | 1190 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1191 unsigned int new_hash, old_hash = sk->sk_hash; 1192 struct unix_sock *u = unix_sk(sk); 1193 struct net *net = sock_net(sk); 1194 struct mnt_idmap *idmap; 1195 struct unix_address *addr; 1196 struct dentry *dentry; 1197 struct path parent; 1198 int err; 1199 1200 unix_mkname_bsd(sunaddr, addr_len); 1201 addr_len = strlen(sunaddr->sun_path) + 1202 offsetof(struct sockaddr_un, sun_path) + 1; 1203 1204 addr = unix_create_addr(sunaddr, addr_len); 1205 if (!addr) 1206 return -ENOMEM; 1207 1208 /* 1209 * Get the parent directory, calculate the hash for last 1210 * component. 1211 */ 1212 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1213 if (IS_ERR(dentry)) { 1214 err = PTR_ERR(dentry); 1215 goto out; 1216 } 1217 1218 /* 1219 * All right, let's create it. 1220 */ 1221 idmap = mnt_idmap(parent.mnt); 1222 err = security_path_mknod(&parent, dentry, mode, 0); 1223 if (!err) 1224 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); 1225 if (err) 1226 goto out_path; 1227 err = mutex_lock_interruptible(&u->bindlock); 1228 if (err) 1229 goto out_unlink; 1230 if (u->addr) 1231 goto out_unlock; 1232 1233 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1234 unix_table_double_lock(net, old_hash, new_hash); 1235 u->path.mnt = mntget(parent.mnt); 1236 u->path.dentry = dget(dentry); 1237 __unix_set_addr_hash(net, sk, addr, new_hash); 1238 unix_table_double_unlock(net, old_hash, new_hash); 1239 unix_insert_bsd_socket(sk); 1240 mutex_unlock(&u->bindlock); 1241 done_path_create(&parent, dentry); 1242 return 0; 1243 1244 out_unlock: 1245 mutex_unlock(&u->bindlock); 1246 err = -EINVAL; 1247 out_unlink: 1248 /* failed after successful mknod? unlink what we'd created... */ 1249 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL); 1250 out_path: 1251 done_path_create(&parent, dentry); 1252 out: 1253 unix_release_addr(addr); 1254 return err == -EEXIST ? -EADDRINUSE : err; 1255 } 1256 1257 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1258 int addr_len) 1259 { 1260 unsigned int new_hash, old_hash = sk->sk_hash; 1261 struct unix_sock *u = unix_sk(sk); 1262 struct net *net = sock_net(sk); 1263 struct unix_address *addr; 1264 int err; 1265 1266 addr = unix_create_addr(sunaddr, addr_len); 1267 if (!addr) 1268 return -ENOMEM; 1269 1270 err = mutex_lock_interruptible(&u->bindlock); 1271 if (err) 1272 goto out; 1273 1274 if (u->addr) { 1275 err = -EINVAL; 1276 goto out_mutex; 1277 } 1278 1279 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1280 unix_table_double_lock(net, old_hash, new_hash); 1281 1282 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1283 goto out_spin; 1284 1285 __unix_set_addr_hash(net, sk, addr, new_hash); 1286 unix_table_double_unlock(net, old_hash, new_hash); 1287 mutex_unlock(&u->bindlock); 1288 return 0; 1289 1290 out_spin: 1291 unix_table_double_unlock(net, old_hash, new_hash); 1292 err = -EADDRINUSE; 1293 out_mutex: 1294 mutex_unlock(&u->bindlock); 1295 out: 1296 unix_release_addr(addr); 1297 return err; 1298 } 1299 1300 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1301 { 1302 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1303 struct sock *sk = sock->sk; 1304 int err; 1305 1306 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1307 sunaddr->sun_family == AF_UNIX) 1308 return unix_autobind(sk); 1309 1310 err = unix_validate_addr(sunaddr, addr_len); 1311 if (err) 1312 return err; 1313 1314 if (sunaddr->sun_path[0]) 1315 err = unix_bind_bsd(sk, sunaddr, addr_len); 1316 else 1317 err = unix_bind_abstract(sk, sunaddr, addr_len); 1318 1319 return err; 1320 } 1321 1322 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1323 { 1324 if (unlikely(sk1 == sk2) || !sk2) { 1325 unix_state_lock(sk1); 1326 return; 1327 } 1328 if (sk1 < sk2) { 1329 unix_state_lock(sk1); 1330 unix_state_lock_nested(sk2); 1331 } else { 1332 unix_state_lock(sk2); 1333 unix_state_lock_nested(sk1); 1334 } 1335 } 1336 1337 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1338 { 1339 if (unlikely(sk1 == sk2) || !sk2) { 1340 unix_state_unlock(sk1); 1341 return; 1342 } 1343 unix_state_unlock(sk1); 1344 unix_state_unlock(sk2); 1345 } 1346 1347 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1348 int alen, int flags) 1349 { 1350 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1351 struct sock *sk = sock->sk; 1352 struct sock *other; 1353 int err; 1354 1355 err = -EINVAL; 1356 if (alen < offsetofend(struct sockaddr, sa_family)) 1357 goto out; 1358 1359 if (addr->sa_family != AF_UNSPEC) { 1360 err = unix_validate_addr(sunaddr, alen); 1361 if (err) 1362 goto out; 1363 1364 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1365 !unix_sk(sk)->addr) { 1366 err = unix_autobind(sk); 1367 if (err) 1368 goto out; 1369 } 1370 1371 restart: 1372 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1373 if (IS_ERR(other)) { 1374 err = PTR_ERR(other); 1375 goto out; 1376 } 1377 1378 unix_state_double_lock(sk, other); 1379 1380 /* Apparently VFS overslept socket death. Retry. */ 1381 if (sock_flag(other, SOCK_DEAD)) { 1382 unix_state_double_unlock(sk, other); 1383 sock_put(other); 1384 goto restart; 1385 } 1386 1387 err = -EPERM; 1388 if (!unix_may_send(sk, other)) 1389 goto out_unlock; 1390 1391 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1392 if (err) 1393 goto out_unlock; 1394 1395 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1396 } else { 1397 /* 1398 * 1003.1g breaking connected state with AF_UNSPEC 1399 */ 1400 other = NULL; 1401 unix_state_double_lock(sk, other); 1402 } 1403 1404 /* 1405 * If it was connected, reconnect. 1406 */ 1407 if (unix_peer(sk)) { 1408 struct sock *old_peer = unix_peer(sk); 1409 1410 unix_peer(sk) = other; 1411 if (!other) 1412 sk->sk_state = TCP_CLOSE; 1413 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1414 1415 unix_state_double_unlock(sk, other); 1416 1417 if (other != old_peer) 1418 unix_dgram_disconnected(sk, old_peer); 1419 sock_put(old_peer); 1420 } else { 1421 unix_peer(sk) = other; 1422 unix_state_double_unlock(sk, other); 1423 } 1424 1425 return 0; 1426 1427 out_unlock: 1428 unix_state_double_unlock(sk, other); 1429 sock_put(other); 1430 out: 1431 return err; 1432 } 1433 1434 static long unix_wait_for_peer(struct sock *other, long timeo) 1435 __releases(&unix_sk(other)->lock) 1436 { 1437 struct unix_sock *u = unix_sk(other); 1438 int sched; 1439 DEFINE_WAIT(wait); 1440 1441 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1442 1443 sched = !sock_flag(other, SOCK_DEAD) && 1444 !(other->sk_shutdown & RCV_SHUTDOWN) && 1445 unix_recvq_full(other); 1446 1447 unix_state_unlock(other); 1448 1449 if (sched) 1450 timeo = schedule_timeout(timeo); 1451 1452 finish_wait(&u->peer_wait, &wait); 1453 return timeo; 1454 } 1455 1456 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1457 int addr_len, int flags) 1458 { 1459 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1460 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1461 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1462 struct net *net = sock_net(sk); 1463 struct sk_buff *skb = NULL; 1464 long timeo; 1465 int err; 1466 int st; 1467 1468 err = unix_validate_addr(sunaddr, addr_len); 1469 if (err) 1470 goto out; 1471 1472 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1473 err = unix_autobind(sk); 1474 if (err) 1475 goto out; 1476 } 1477 1478 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1479 1480 /* First of all allocate resources. 1481 If we will make it after state is locked, 1482 we will have to recheck all again in any case. 1483 */ 1484 1485 /* create new sock for complete connection */ 1486 newsk = unix_create1(net, NULL, 0, sock->type); 1487 if (IS_ERR(newsk)) { 1488 err = PTR_ERR(newsk); 1489 newsk = NULL; 1490 goto out; 1491 } 1492 1493 err = -ENOMEM; 1494 1495 /* Allocate skb for sending to listening sock */ 1496 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1497 if (skb == NULL) 1498 goto out; 1499 1500 restart: 1501 /* Find listening sock. */ 1502 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1503 if (IS_ERR(other)) { 1504 err = PTR_ERR(other); 1505 other = NULL; 1506 goto out; 1507 } 1508 1509 /* Latch state of peer */ 1510 unix_state_lock(other); 1511 1512 /* Apparently VFS overslept socket death. Retry. */ 1513 if (sock_flag(other, SOCK_DEAD)) { 1514 unix_state_unlock(other); 1515 sock_put(other); 1516 goto restart; 1517 } 1518 1519 err = -ECONNREFUSED; 1520 if (other->sk_state != TCP_LISTEN) 1521 goto out_unlock; 1522 if (other->sk_shutdown & RCV_SHUTDOWN) 1523 goto out_unlock; 1524 1525 if (unix_recvq_full(other)) { 1526 err = -EAGAIN; 1527 if (!timeo) 1528 goto out_unlock; 1529 1530 timeo = unix_wait_for_peer(other, timeo); 1531 1532 err = sock_intr_errno(timeo); 1533 if (signal_pending(current)) 1534 goto out; 1535 sock_put(other); 1536 goto restart; 1537 } 1538 1539 /* Latch our state. 1540 1541 It is tricky place. We need to grab our state lock and cannot 1542 drop lock on peer. It is dangerous because deadlock is 1543 possible. Connect to self case and simultaneous 1544 attempt to connect are eliminated by checking socket 1545 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1546 check this before attempt to grab lock. 1547 1548 Well, and we have to recheck the state after socket locked. 1549 */ 1550 st = sk->sk_state; 1551 1552 switch (st) { 1553 case TCP_CLOSE: 1554 /* This is ok... continue with connect */ 1555 break; 1556 case TCP_ESTABLISHED: 1557 /* Socket is already connected */ 1558 err = -EISCONN; 1559 goto out_unlock; 1560 default: 1561 err = -EINVAL; 1562 goto out_unlock; 1563 } 1564 1565 unix_state_lock_nested(sk); 1566 1567 if (sk->sk_state != st) { 1568 unix_state_unlock(sk); 1569 unix_state_unlock(other); 1570 sock_put(other); 1571 goto restart; 1572 } 1573 1574 err = security_unix_stream_connect(sk, other, newsk); 1575 if (err) { 1576 unix_state_unlock(sk); 1577 goto out_unlock; 1578 } 1579 1580 /* The way is open! Fastly set all the necessary fields... */ 1581 1582 sock_hold(sk); 1583 unix_peer(newsk) = sk; 1584 newsk->sk_state = TCP_ESTABLISHED; 1585 newsk->sk_type = sk->sk_type; 1586 init_peercred(newsk); 1587 newu = unix_sk(newsk); 1588 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1589 otheru = unix_sk(other); 1590 1591 /* copy address information from listening to new sock 1592 * 1593 * The contents of *(otheru->addr) and otheru->path 1594 * are seen fully set up here, since we have found 1595 * otheru in hash under its lock. Insertion into the 1596 * hash chain we'd found it in had been done in an 1597 * earlier critical area protected by the chain's lock, 1598 * the same one where we'd set *(otheru->addr) contents, 1599 * as well as otheru->path and otheru->addr itself. 1600 * 1601 * Using smp_store_release() here to set newu->addr 1602 * is enough to make those stores, as well as stores 1603 * to newu->path visible to anyone who gets newu->addr 1604 * by smp_load_acquire(). IOW, the same warranties 1605 * as for unix_sock instances bound in unix_bind() or 1606 * in unix_autobind(). 1607 */ 1608 if (otheru->path.dentry) { 1609 path_get(&otheru->path); 1610 newu->path = otheru->path; 1611 } 1612 refcount_inc(&otheru->addr->refcnt); 1613 smp_store_release(&newu->addr, otheru->addr); 1614 1615 /* Set credentials */ 1616 copy_peercred(sk, other); 1617 1618 sock->state = SS_CONNECTED; 1619 sk->sk_state = TCP_ESTABLISHED; 1620 sock_hold(newsk); 1621 1622 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1623 unix_peer(sk) = newsk; 1624 1625 unix_state_unlock(sk); 1626 1627 /* take ten and send info to listening sock */ 1628 spin_lock(&other->sk_receive_queue.lock); 1629 __skb_queue_tail(&other->sk_receive_queue, skb); 1630 spin_unlock(&other->sk_receive_queue.lock); 1631 unix_state_unlock(other); 1632 other->sk_data_ready(other); 1633 sock_put(other); 1634 return 0; 1635 1636 out_unlock: 1637 if (other) 1638 unix_state_unlock(other); 1639 1640 out: 1641 kfree_skb(skb); 1642 if (newsk) 1643 unix_release_sock(newsk, 0); 1644 if (other) 1645 sock_put(other); 1646 return err; 1647 } 1648 1649 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1650 { 1651 struct sock *ska = socka->sk, *skb = sockb->sk; 1652 1653 /* Join our sockets back to back */ 1654 sock_hold(ska); 1655 sock_hold(skb); 1656 unix_peer(ska) = skb; 1657 unix_peer(skb) = ska; 1658 init_peercred(ska); 1659 init_peercred(skb); 1660 1661 ska->sk_state = TCP_ESTABLISHED; 1662 skb->sk_state = TCP_ESTABLISHED; 1663 socka->state = SS_CONNECTED; 1664 sockb->state = SS_CONNECTED; 1665 return 0; 1666 } 1667 1668 static void unix_sock_inherit_flags(const struct socket *old, 1669 struct socket *new) 1670 { 1671 if (test_bit(SOCK_PASSCRED, &old->flags)) 1672 set_bit(SOCK_PASSCRED, &new->flags); 1673 if (test_bit(SOCK_PASSSEC, &old->flags)) 1674 set_bit(SOCK_PASSSEC, &new->flags); 1675 } 1676 1677 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1678 bool kern) 1679 { 1680 struct sock *sk = sock->sk; 1681 struct sock *tsk; 1682 struct sk_buff *skb; 1683 int err; 1684 1685 err = -EOPNOTSUPP; 1686 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1687 goto out; 1688 1689 err = -EINVAL; 1690 if (sk->sk_state != TCP_LISTEN) 1691 goto out; 1692 1693 /* If socket state is TCP_LISTEN it cannot change (for now...), 1694 * so that no locks are necessary. 1695 */ 1696 1697 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1698 &err); 1699 if (!skb) { 1700 /* This means receive shutdown. */ 1701 if (err == 0) 1702 err = -EINVAL; 1703 goto out; 1704 } 1705 1706 tsk = skb->sk; 1707 skb_free_datagram(sk, skb); 1708 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1709 1710 /* attach accepted sock to socket */ 1711 unix_state_lock(tsk); 1712 newsock->state = SS_CONNECTED; 1713 unix_sock_inherit_flags(sock, newsock); 1714 sock_graft(tsk, newsock); 1715 unix_state_unlock(tsk); 1716 return 0; 1717 1718 out: 1719 return err; 1720 } 1721 1722 1723 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1724 { 1725 struct sock *sk = sock->sk; 1726 struct unix_address *addr; 1727 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1728 int err = 0; 1729 1730 if (peer) { 1731 sk = unix_peer_get(sk); 1732 1733 err = -ENOTCONN; 1734 if (!sk) 1735 goto out; 1736 err = 0; 1737 } else { 1738 sock_hold(sk); 1739 } 1740 1741 addr = smp_load_acquire(&unix_sk(sk)->addr); 1742 if (!addr) { 1743 sunaddr->sun_family = AF_UNIX; 1744 sunaddr->sun_path[0] = 0; 1745 err = offsetof(struct sockaddr_un, sun_path); 1746 } else { 1747 err = addr->len; 1748 memcpy(sunaddr, addr->name, addr->len); 1749 } 1750 sock_put(sk); 1751 out: 1752 return err; 1753 } 1754 1755 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1756 { 1757 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1758 1759 /* 1760 * Garbage collection of unix sockets starts by selecting a set of 1761 * candidate sockets which have reference only from being in flight 1762 * (total_refs == inflight_refs). This condition is checked once during 1763 * the candidate collection phase, and candidates are marked as such, so 1764 * that non-candidates can later be ignored. While inflight_refs is 1765 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1766 * is an instantaneous decision. 1767 * 1768 * Once a candidate, however, the socket must not be reinstalled into a 1769 * file descriptor while the garbage collection is in progress. 1770 * 1771 * If the above conditions are met, then the directed graph of 1772 * candidates (*) does not change while unix_gc_lock is held. 1773 * 1774 * Any operations that changes the file count through file descriptors 1775 * (dup, close, sendmsg) does not change the graph since candidates are 1776 * not installed in fds. 1777 * 1778 * Dequeing a candidate via recvmsg would install it into an fd, but 1779 * that takes unix_gc_lock to decrement the inflight count, so it's 1780 * serialized with garbage collection. 1781 * 1782 * MSG_PEEK is special in that it does not change the inflight count, 1783 * yet does install the socket into an fd. The following lock/unlock 1784 * pair is to ensure serialization with garbage collection. It must be 1785 * done between incrementing the file count and installing the file into 1786 * an fd. 1787 * 1788 * If garbage collection starts after the barrier provided by the 1789 * lock/unlock, then it will see the elevated refcount and not mark this 1790 * as a candidate. If a garbage collection is already in progress 1791 * before the file count was incremented, then the lock/unlock pair will 1792 * ensure that garbage collection is finished before progressing to 1793 * installing the fd. 1794 * 1795 * (*) A -> B where B is on the queue of A or B is on the queue of C 1796 * which is on the queue of listening socket A. 1797 */ 1798 spin_lock(&unix_gc_lock); 1799 spin_unlock(&unix_gc_lock); 1800 } 1801 1802 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1803 { 1804 int err = 0; 1805 1806 UNIXCB(skb).pid = get_pid(scm->pid); 1807 UNIXCB(skb).uid = scm->creds.uid; 1808 UNIXCB(skb).gid = scm->creds.gid; 1809 UNIXCB(skb).fp = NULL; 1810 unix_get_secdata(scm, skb); 1811 if (scm->fp && send_fds) 1812 err = unix_attach_fds(scm, skb); 1813 1814 skb->destructor = unix_destruct_scm; 1815 return err; 1816 } 1817 1818 static bool unix_passcred_enabled(const struct socket *sock, 1819 const struct sock *other) 1820 { 1821 return test_bit(SOCK_PASSCRED, &sock->flags) || 1822 !other->sk_socket || 1823 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1824 } 1825 1826 /* 1827 * Some apps rely on write() giving SCM_CREDENTIALS 1828 * We include credentials if source or destination socket 1829 * asserted SOCK_PASSCRED. 1830 */ 1831 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1832 const struct sock *other) 1833 { 1834 if (UNIXCB(skb).pid) 1835 return; 1836 if (unix_passcred_enabled(sock, other)) { 1837 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1838 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1839 } 1840 } 1841 1842 static int maybe_init_creds(struct scm_cookie *scm, 1843 struct socket *socket, 1844 const struct sock *other) 1845 { 1846 int err; 1847 struct msghdr msg = { .msg_controllen = 0 }; 1848 1849 err = scm_send(socket, &msg, scm, false); 1850 if (err) 1851 return err; 1852 1853 if (unix_passcred_enabled(socket, other)) { 1854 scm->pid = get_pid(task_tgid(current)); 1855 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1856 } 1857 return err; 1858 } 1859 1860 static bool unix_skb_scm_eq(struct sk_buff *skb, 1861 struct scm_cookie *scm) 1862 { 1863 return UNIXCB(skb).pid == scm->pid && 1864 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1865 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1866 unix_secdata_eq(scm, skb); 1867 } 1868 1869 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1870 { 1871 struct scm_fp_list *fp = UNIXCB(skb).fp; 1872 struct unix_sock *u = unix_sk(sk); 1873 1874 if (unlikely(fp && fp->count)) 1875 atomic_add(fp->count, &u->scm_stat.nr_fds); 1876 } 1877 1878 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1879 { 1880 struct scm_fp_list *fp = UNIXCB(skb).fp; 1881 struct unix_sock *u = unix_sk(sk); 1882 1883 if (unlikely(fp && fp->count)) 1884 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1885 } 1886 1887 /* 1888 * Send AF_UNIX data. 1889 */ 1890 1891 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1892 size_t len) 1893 { 1894 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1895 struct sock *sk = sock->sk, *other = NULL; 1896 struct unix_sock *u = unix_sk(sk); 1897 struct scm_cookie scm; 1898 struct sk_buff *skb; 1899 int data_len = 0; 1900 int sk_locked; 1901 long timeo; 1902 int err; 1903 1904 wait_for_unix_gc(); 1905 err = scm_send(sock, msg, &scm, false); 1906 if (err < 0) 1907 return err; 1908 1909 err = -EOPNOTSUPP; 1910 if (msg->msg_flags&MSG_OOB) 1911 goto out; 1912 1913 if (msg->msg_namelen) { 1914 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1915 if (err) 1916 goto out; 1917 } else { 1918 sunaddr = NULL; 1919 err = -ENOTCONN; 1920 other = unix_peer_get(sk); 1921 if (!other) 1922 goto out; 1923 } 1924 1925 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1926 err = unix_autobind(sk); 1927 if (err) 1928 goto out; 1929 } 1930 1931 err = -EMSGSIZE; 1932 if (len > sk->sk_sndbuf - 32) 1933 goto out; 1934 1935 if (len > SKB_MAX_ALLOC) { 1936 data_len = min_t(size_t, 1937 len - SKB_MAX_ALLOC, 1938 MAX_SKB_FRAGS * PAGE_SIZE); 1939 data_len = PAGE_ALIGN(data_len); 1940 1941 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1942 } 1943 1944 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1945 msg->msg_flags & MSG_DONTWAIT, &err, 1946 PAGE_ALLOC_COSTLY_ORDER); 1947 if (skb == NULL) 1948 goto out; 1949 1950 err = unix_scm_to_skb(&scm, skb, true); 1951 if (err < 0) 1952 goto out_free; 1953 1954 skb_put(skb, len - data_len); 1955 skb->data_len = data_len; 1956 skb->len = len; 1957 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1958 if (err) 1959 goto out_free; 1960 1961 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1962 1963 restart: 1964 if (!other) { 1965 err = -ECONNRESET; 1966 if (sunaddr == NULL) 1967 goto out_free; 1968 1969 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1970 sk->sk_type); 1971 if (IS_ERR(other)) { 1972 err = PTR_ERR(other); 1973 other = NULL; 1974 goto out_free; 1975 } 1976 } 1977 1978 if (sk_filter(other, skb) < 0) { 1979 /* Toss the packet but do not return any error to the sender */ 1980 err = len; 1981 goto out_free; 1982 } 1983 1984 sk_locked = 0; 1985 unix_state_lock(other); 1986 restart_locked: 1987 err = -EPERM; 1988 if (!unix_may_send(sk, other)) 1989 goto out_unlock; 1990 1991 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1992 /* 1993 * Check with 1003.1g - what should 1994 * datagram error 1995 */ 1996 unix_state_unlock(other); 1997 sock_put(other); 1998 1999 if (!sk_locked) 2000 unix_state_lock(sk); 2001 2002 err = 0; 2003 if (sk->sk_type == SOCK_SEQPACKET) { 2004 /* We are here only when racing with unix_release_sock() 2005 * is clearing @other. Never change state to TCP_CLOSE 2006 * unlike SOCK_DGRAM wants. 2007 */ 2008 unix_state_unlock(sk); 2009 err = -EPIPE; 2010 } else if (unix_peer(sk) == other) { 2011 unix_peer(sk) = NULL; 2012 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 2013 2014 sk->sk_state = TCP_CLOSE; 2015 unix_state_unlock(sk); 2016 2017 unix_dgram_disconnected(sk, other); 2018 sock_put(other); 2019 err = -ECONNREFUSED; 2020 } else { 2021 unix_state_unlock(sk); 2022 } 2023 2024 other = NULL; 2025 if (err) 2026 goto out_free; 2027 goto restart; 2028 } 2029 2030 err = -EPIPE; 2031 if (other->sk_shutdown & RCV_SHUTDOWN) 2032 goto out_unlock; 2033 2034 if (sk->sk_type != SOCK_SEQPACKET) { 2035 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 2036 if (err) 2037 goto out_unlock; 2038 } 2039 2040 /* other == sk && unix_peer(other) != sk if 2041 * - unix_peer(sk) == NULL, destination address bound to sk 2042 * - unix_peer(sk) == sk by time of get but disconnected before lock 2043 */ 2044 if (other != sk && 2045 unlikely(unix_peer(other) != sk && 2046 unix_recvq_full_lockless(other))) { 2047 if (timeo) { 2048 timeo = unix_wait_for_peer(other, timeo); 2049 2050 err = sock_intr_errno(timeo); 2051 if (signal_pending(current)) 2052 goto out_free; 2053 2054 goto restart; 2055 } 2056 2057 if (!sk_locked) { 2058 unix_state_unlock(other); 2059 unix_state_double_lock(sk, other); 2060 } 2061 2062 if (unix_peer(sk) != other || 2063 unix_dgram_peer_wake_me(sk, other)) { 2064 err = -EAGAIN; 2065 sk_locked = 1; 2066 goto out_unlock; 2067 } 2068 2069 if (!sk_locked) { 2070 sk_locked = 1; 2071 goto restart_locked; 2072 } 2073 } 2074 2075 if (unlikely(sk_locked)) 2076 unix_state_unlock(sk); 2077 2078 if (sock_flag(other, SOCK_RCVTSTAMP)) 2079 __net_timestamp(skb); 2080 maybe_add_creds(skb, sock, other); 2081 scm_stat_add(other, skb); 2082 skb_queue_tail(&other->sk_receive_queue, skb); 2083 unix_state_unlock(other); 2084 other->sk_data_ready(other); 2085 sock_put(other); 2086 scm_destroy(&scm); 2087 return len; 2088 2089 out_unlock: 2090 if (sk_locked) 2091 unix_state_unlock(sk); 2092 unix_state_unlock(other); 2093 out_free: 2094 kfree_skb(skb); 2095 out: 2096 if (other) 2097 sock_put(other); 2098 scm_destroy(&scm); 2099 return err; 2100 } 2101 2102 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2103 * bytes, and a minimum of a full page. 2104 */ 2105 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2106 2107 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2108 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, 2109 struct scm_cookie *scm, bool fds_sent) 2110 { 2111 struct unix_sock *ousk = unix_sk(other); 2112 struct sk_buff *skb; 2113 int err = 0; 2114 2115 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2116 2117 if (!skb) 2118 return err; 2119 2120 err = unix_scm_to_skb(scm, skb, !fds_sent); 2121 if (err < 0) { 2122 kfree_skb(skb); 2123 return err; 2124 } 2125 skb_put(skb, 1); 2126 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2127 2128 if (err) { 2129 kfree_skb(skb); 2130 return err; 2131 } 2132 2133 unix_state_lock(other); 2134 2135 if (sock_flag(other, SOCK_DEAD) || 2136 (other->sk_shutdown & RCV_SHUTDOWN)) { 2137 unix_state_unlock(other); 2138 kfree_skb(skb); 2139 return -EPIPE; 2140 } 2141 2142 maybe_add_creds(skb, sock, other); 2143 skb_get(skb); 2144 2145 if (ousk->oob_skb) 2146 consume_skb(ousk->oob_skb); 2147 2148 WRITE_ONCE(ousk->oob_skb, skb); 2149 2150 scm_stat_add(other, skb); 2151 skb_queue_tail(&other->sk_receive_queue, skb); 2152 sk_send_sigurg(other); 2153 unix_state_unlock(other); 2154 other->sk_data_ready(other); 2155 2156 return err; 2157 } 2158 #endif 2159 2160 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2161 size_t len) 2162 { 2163 struct sock *sk = sock->sk; 2164 struct sock *other = NULL; 2165 int err, size; 2166 struct sk_buff *skb; 2167 int sent = 0; 2168 struct scm_cookie scm; 2169 bool fds_sent = false; 2170 int data_len; 2171 2172 wait_for_unix_gc(); 2173 err = scm_send(sock, msg, &scm, false); 2174 if (err < 0) 2175 return err; 2176 2177 err = -EOPNOTSUPP; 2178 if (msg->msg_flags & MSG_OOB) { 2179 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2180 if (len) 2181 len--; 2182 else 2183 #endif 2184 goto out_err; 2185 } 2186 2187 if (msg->msg_namelen) { 2188 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2189 goto out_err; 2190 } else { 2191 err = -ENOTCONN; 2192 other = unix_peer(sk); 2193 if (!other) 2194 goto out_err; 2195 } 2196 2197 if (sk->sk_shutdown & SEND_SHUTDOWN) 2198 goto pipe_err; 2199 2200 while (sent < len) { 2201 size = len - sent; 2202 2203 /* Keep two messages in the pipe so it schedules better */ 2204 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2205 2206 /* allow fallback to order-0 allocations */ 2207 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2208 2209 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2210 2211 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2212 2213 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2214 msg->msg_flags & MSG_DONTWAIT, &err, 2215 get_order(UNIX_SKB_FRAGS_SZ)); 2216 if (!skb) 2217 goto out_err; 2218 2219 /* Only send the fds in the first buffer */ 2220 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2221 if (err < 0) { 2222 kfree_skb(skb); 2223 goto out_err; 2224 } 2225 fds_sent = true; 2226 2227 skb_put(skb, size - data_len); 2228 skb->data_len = data_len; 2229 skb->len = size; 2230 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2231 if (err) { 2232 kfree_skb(skb); 2233 goto out_err; 2234 } 2235 2236 unix_state_lock(other); 2237 2238 if (sock_flag(other, SOCK_DEAD) || 2239 (other->sk_shutdown & RCV_SHUTDOWN)) 2240 goto pipe_err_free; 2241 2242 maybe_add_creds(skb, sock, other); 2243 scm_stat_add(other, skb); 2244 skb_queue_tail(&other->sk_receive_queue, skb); 2245 unix_state_unlock(other); 2246 other->sk_data_ready(other); 2247 sent += size; 2248 } 2249 2250 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2251 if (msg->msg_flags & MSG_OOB) { 2252 err = queue_oob(sock, msg, other, &scm, fds_sent); 2253 if (err) 2254 goto out_err; 2255 sent++; 2256 } 2257 #endif 2258 2259 scm_destroy(&scm); 2260 2261 return sent; 2262 2263 pipe_err_free: 2264 unix_state_unlock(other); 2265 kfree_skb(skb); 2266 pipe_err: 2267 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2268 send_sig(SIGPIPE, current, 0); 2269 err = -EPIPE; 2270 out_err: 2271 scm_destroy(&scm); 2272 return sent ? : err; 2273 } 2274 2275 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2276 int offset, size_t size, int flags) 2277 { 2278 int err; 2279 bool send_sigpipe = false; 2280 bool init_scm = true; 2281 struct scm_cookie scm; 2282 struct sock *other, *sk = socket->sk; 2283 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2284 2285 if (flags & MSG_OOB) 2286 return -EOPNOTSUPP; 2287 2288 other = unix_peer(sk); 2289 if (!other || sk->sk_state != TCP_ESTABLISHED) 2290 return -ENOTCONN; 2291 2292 if (false) { 2293 alloc_skb: 2294 unix_state_unlock(other); 2295 mutex_unlock(&unix_sk(other)->iolock); 2296 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2297 &err, 0); 2298 if (!newskb) 2299 goto err; 2300 } 2301 2302 /* we must acquire iolock as we modify already present 2303 * skbs in the sk_receive_queue and mess with skb->len 2304 */ 2305 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2306 if (err) { 2307 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2308 goto err; 2309 } 2310 2311 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2312 err = -EPIPE; 2313 send_sigpipe = true; 2314 goto err_unlock; 2315 } 2316 2317 unix_state_lock(other); 2318 2319 if (sock_flag(other, SOCK_DEAD) || 2320 other->sk_shutdown & RCV_SHUTDOWN) { 2321 err = -EPIPE; 2322 send_sigpipe = true; 2323 goto err_state_unlock; 2324 } 2325 2326 if (init_scm) { 2327 err = maybe_init_creds(&scm, socket, other); 2328 if (err) 2329 goto err_state_unlock; 2330 init_scm = false; 2331 } 2332 2333 skb = skb_peek_tail(&other->sk_receive_queue); 2334 if (tail && tail == skb) { 2335 skb = newskb; 2336 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2337 if (newskb) { 2338 skb = newskb; 2339 } else { 2340 tail = skb; 2341 goto alloc_skb; 2342 } 2343 } else if (newskb) { 2344 /* this is fast path, we don't necessarily need to 2345 * call to kfree_skb even though with newskb == NULL 2346 * this - does no harm 2347 */ 2348 consume_skb(newskb); 2349 newskb = NULL; 2350 } 2351 2352 if (skb_append_pagefrags(skb, page, offset, size)) { 2353 tail = skb; 2354 goto alloc_skb; 2355 } 2356 2357 skb->len += size; 2358 skb->data_len += size; 2359 skb->truesize += size; 2360 refcount_add(size, &sk->sk_wmem_alloc); 2361 2362 if (newskb) { 2363 err = unix_scm_to_skb(&scm, skb, false); 2364 if (err) 2365 goto err_state_unlock; 2366 spin_lock(&other->sk_receive_queue.lock); 2367 __skb_queue_tail(&other->sk_receive_queue, newskb); 2368 spin_unlock(&other->sk_receive_queue.lock); 2369 } 2370 2371 unix_state_unlock(other); 2372 mutex_unlock(&unix_sk(other)->iolock); 2373 2374 other->sk_data_ready(other); 2375 scm_destroy(&scm); 2376 return size; 2377 2378 err_state_unlock: 2379 unix_state_unlock(other); 2380 err_unlock: 2381 mutex_unlock(&unix_sk(other)->iolock); 2382 err: 2383 kfree_skb(newskb); 2384 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2385 send_sig(SIGPIPE, current, 0); 2386 if (!init_scm) 2387 scm_destroy(&scm); 2388 return err; 2389 } 2390 2391 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2392 size_t len) 2393 { 2394 int err; 2395 struct sock *sk = sock->sk; 2396 2397 err = sock_error(sk); 2398 if (err) 2399 return err; 2400 2401 if (sk->sk_state != TCP_ESTABLISHED) 2402 return -ENOTCONN; 2403 2404 if (msg->msg_namelen) 2405 msg->msg_namelen = 0; 2406 2407 return unix_dgram_sendmsg(sock, msg, len); 2408 } 2409 2410 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2411 size_t size, int flags) 2412 { 2413 struct sock *sk = sock->sk; 2414 2415 if (sk->sk_state != TCP_ESTABLISHED) 2416 return -ENOTCONN; 2417 2418 return unix_dgram_recvmsg(sock, msg, size, flags); 2419 } 2420 2421 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2422 { 2423 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2424 2425 if (addr) { 2426 msg->msg_namelen = addr->len; 2427 memcpy(msg->msg_name, addr->name, addr->len); 2428 } 2429 } 2430 2431 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2432 int flags) 2433 { 2434 struct scm_cookie scm; 2435 struct socket *sock = sk->sk_socket; 2436 struct unix_sock *u = unix_sk(sk); 2437 struct sk_buff *skb, *last; 2438 long timeo; 2439 int skip; 2440 int err; 2441 2442 err = -EOPNOTSUPP; 2443 if (flags&MSG_OOB) 2444 goto out; 2445 2446 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2447 2448 do { 2449 mutex_lock(&u->iolock); 2450 2451 skip = sk_peek_offset(sk, flags); 2452 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2453 &skip, &err, &last); 2454 if (skb) { 2455 if (!(flags & MSG_PEEK)) 2456 scm_stat_del(sk, skb); 2457 break; 2458 } 2459 2460 mutex_unlock(&u->iolock); 2461 2462 if (err != -EAGAIN) 2463 break; 2464 } while (timeo && 2465 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2466 &err, &timeo, last)); 2467 2468 if (!skb) { /* implies iolock unlocked */ 2469 unix_state_lock(sk); 2470 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2471 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2472 (sk->sk_shutdown & RCV_SHUTDOWN)) 2473 err = 0; 2474 unix_state_unlock(sk); 2475 goto out; 2476 } 2477 2478 if (wq_has_sleeper(&u->peer_wait)) 2479 wake_up_interruptible_sync_poll(&u->peer_wait, 2480 EPOLLOUT | EPOLLWRNORM | 2481 EPOLLWRBAND); 2482 2483 if (msg->msg_name) 2484 unix_copy_addr(msg, skb->sk); 2485 2486 if (size > skb->len - skip) 2487 size = skb->len - skip; 2488 else if (size < skb->len - skip) 2489 msg->msg_flags |= MSG_TRUNC; 2490 2491 err = skb_copy_datagram_msg(skb, skip, msg, size); 2492 if (err) 2493 goto out_free; 2494 2495 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2496 __sock_recv_timestamp(msg, sk, skb); 2497 2498 memset(&scm, 0, sizeof(scm)); 2499 2500 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2501 unix_set_secdata(&scm, skb); 2502 2503 if (!(flags & MSG_PEEK)) { 2504 if (UNIXCB(skb).fp) 2505 unix_detach_fds(&scm, skb); 2506 2507 sk_peek_offset_bwd(sk, skb->len); 2508 } else { 2509 /* It is questionable: on PEEK we could: 2510 - do not return fds - good, but too simple 8) 2511 - return fds, and do not return them on read (old strategy, 2512 apparently wrong) 2513 - clone fds (I chose it for now, it is the most universal 2514 solution) 2515 2516 POSIX 1003.1g does not actually define this clearly 2517 at all. POSIX 1003.1g doesn't define a lot of things 2518 clearly however! 2519 2520 */ 2521 2522 sk_peek_offset_fwd(sk, size); 2523 2524 if (UNIXCB(skb).fp) 2525 unix_peek_fds(&scm, skb); 2526 } 2527 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2528 2529 scm_recv(sock, msg, &scm, flags); 2530 2531 out_free: 2532 skb_free_datagram(sk, skb); 2533 mutex_unlock(&u->iolock); 2534 out: 2535 return err; 2536 } 2537 2538 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2539 int flags) 2540 { 2541 struct sock *sk = sock->sk; 2542 2543 #ifdef CONFIG_BPF_SYSCALL 2544 const struct proto *prot = READ_ONCE(sk->sk_prot); 2545 2546 if (prot != &unix_dgram_proto) 2547 return prot->recvmsg(sk, msg, size, flags, NULL); 2548 #endif 2549 return __unix_dgram_recvmsg(sk, msg, size, flags); 2550 } 2551 2552 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2553 { 2554 struct unix_sock *u = unix_sk(sk); 2555 struct sk_buff *skb; 2556 int err, copied; 2557 2558 mutex_lock(&u->iolock); 2559 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2560 mutex_unlock(&u->iolock); 2561 if (!skb) 2562 return err; 2563 2564 copied = recv_actor(sk, skb); 2565 kfree_skb(skb); 2566 2567 return copied; 2568 } 2569 2570 /* 2571 * Sleep until more data has arrived. But check for races.. 2572 */ 2573 static long unix_stream_data_wait(struct sock *sk, long timeo, 2574 struct sk_buff *last, unsigned int last_len, 2575 bool freezable) 2576 { 2577 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; 2578 struct sk_buff *tail; 2579 DEFINE_WAIT(wait); 2580 2581 unix_state_lock(sk); 2582 2583 for (;;) { 2584 prepare_to_wait(sk_sleep(sk), &wait, state); 2585 2586 tail = skb_peek_tail(&sk->sk_receive_queue); 2587 if (tail != last || 2588 (tail && tail->len != last_len) || 2589 sk->sk_err || 2590 (sk->sk_shutdown & RCV_SHUTDOWN) || 2591 signal_pending(current) || 2592 !timeo) 2593 break; 2594 2595 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2596 unix_state_unlock(sk); 2597 timeo = schedule_timeout(timeo); 2598 unix_state_lock(sk); 2599 2600 if (sock_flag(sk, SOCK_DEAD)) 2601 break; 2602 2603 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2604 } 2605 2606 finish_wait(sk_sleep(sk), &wait); 2607 unix_state_unlock(sk); 2608 return timeo; 2609 } 2610 2611 static unsigned int unix_skb_len(const struct sk_buff *skb) 2612 { 2613 return skb->len - UNIXCB(skb).consumed; 2614 } 2615 2616 struct unix_stream_read_state { 2617 int (*recv_actor)(struct sk_buff *, int, int, 2618 struct unix_stream_read_state *); 2619 struct socket *socket; 2620 struct msghdr *msg; 2621 struct pipe_inode_info *pipe; 2622 size_t size; 2623 int flags; 2624 unsigned int splice_flags; 2625 }; 2626 2627 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2628 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2629 { 2630 struct socket *sock = state->socket; 2631 struct sock *sk = sock->sk; 2632 struct unix_sock *u = unix_sk(sk); 2633 int chunk = 1; 2634 struct sk_buff *oob_skb; 2635 2636 mutex_lock(&u->iolock); 2637 unix_state_lock(sk); 2638 2639 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2640 unix_state_unlock(sk); 2641 mutex_unlock(&u->iolock); 2642 return -EINVAL; 2643 } 2644 2645 oob_skb = u->oob_skb; 2646 2647 if (!(state->flags & MSG_PEEK)) 2648 WRITE_ONCE(u->oob_skb, NULL); 2649 2650 unix_state_unlock(sk); 2651 2652 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2653 2654 if (!(state->flags & MSG_PEEK)) { 2655 UNIXCB(oob_skb).consumed += 1; 2656 kfree_skb(oob_skb); 2657 } 2658 2659 mutex_unlock(&u->iolock); 2660 2661 if (chunk < 0) 2662 return -EFAULT; 2663 2664 state->msg->msg_flags |= MSG_OOB; 2665 return 1; 2666 } 2667 2668 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2669 int flags, int copied) 2670 { 2671 struct unix_sock *u = unix_sk(sk); 2672 2673 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2674 skb_unlink(skb, &sk->sk_receive_queue); 2675 consume_skb(skb); 2676 skb = NULL; 2677 } else { 2678 if (skb == u->oob_skb) { 2679 if (copied) { 2680 skb = NULL; 2681 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2682 if (!(flags & MSG_PEEK)) { 2683 WRITE_ONCE(u->oob_skb, NULL); 2684 consume_skb(skb); 2685 } 2686 } else if (!(flags & MSG_PEEK)) { 2687 skb_unlink(skb, &sk->sk_receive_queue); 2688 consume_skb(skb); 2689 skb = skb_peek(&sk->sk_receive_queue); 2690 } 2691 } 2692 } 2693 return skb; 2694 } 2695 #endif 2696 2697 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2698 { 2699 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2700 return -ENOTCONN; 2701 2702 return unix_read_skb(sk, recv_actor); 2703 } 2704 2705 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2706 bool freezable) 2707 { 2708 struct scm_cookie scm; 2709 struct socket *sock = state->socket; 2710 struct sock *sk = sock->sk; 2711 struct unix_sock *u = unix_sk(sk); 2712 int copied = 0; 2713 int flags = state->flags; 2714 int noblock = flags & MSG_DONTWAIT; 2715 bool check_creds = false; 2716 int target; 2717 int err = 0; 2718 long timeo; 2719 int skip; 2720 size_t size = state->size; 2721 unsigned int last_len; 2722 2723 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2724 err = -EINVAL; 2725 goto out; 2726 } 2727 2728 if (unlikely(flags & MSG_OOB)) { 2729 err = -EOPNOTSUPP; 2730 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2731 err = unix_stream_recv_urg(state); 2732 #endif 2733 goto out; 2734 } 2735 2736 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2737 timeo = sock_rcvtimeo(sk, noblock); 2738 2739 memset(&scm, 0, sizeof(scm)); 2740 2741 /* Lock the socket to prevent queue disordering 2742 * while sleeps in memcpy_tomsg 2743 */ 2744 mutex_lock(&u->iolock); 2745 2746 skip = max(sk_peek_offset(sk, flags), 0); 2747 2748 do { 2749 int chunk; 2750 bool drop_skb; 2751 struct sk_buff *skb, *last; 2752 2753 redo: 2754 unix_state_lock(sk); 2755 if (sock_flag(sk, SOCK_DEAD)) { 2756 err = -ECONNRESET; 2757 goto unlock; 2758 } 2759 last = skb = skb_peek(&sk->sk_receive_queue); 2760 last_len = last ? last->len : 0; 2761 2762 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2763 if (skb) { 2764 skb = manage_oob(skb, sk, flags, copied); 2765 if (!skb) { 2766 unix_state_unlock(sk); 2767 if (copied) 2768 break; 2769 goto redo; 2770 } 2771 } 2772 #endif 2773 again: 2774 if (skb == NULL) { 2775 if (copied >= target) 2776 goto unlock; 2777 2778 /* 2779 * POSIX 1003.1g mandates this order. 2780 */ 2781 2782 err = sock_error(sk); 2783 if (err) 2784 goto unlock; 2785 if (sk->sk_shutdown & RCV_SHUTDOWN) 2786 goto unlock; 2787 2788 unix_state_unlock(sk); 2789 if (!timeo) { 2790 err = -EAGAIN; 2791 break; 2792 } 2793 2794 mutex_unlock(&u->iolock); 2795 2796 timeo = unix_stream_data_wait(sk, timeo, last, 2797 last_len, freezable); 2798 2799 if (signal_pending(current)) { 2800 err = sock_intr_errno(timeo); 2801 scm_destroy(&scm); 2802 goto out; 2803 } 2804 2805 mutex_lock(&u->iolock); 2806 goto redo; 2807 unlock: 2808 unix_state_unlock(sk); 2809 break; 2810 } 2811 2812 while (skip >= unix_skb_len(skb)) { 2813 skip -= unix_skb_len(skb); 2814 last = skb; 2815 last_len = skb->len; 2816 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2817 if (!skb) 2818 goto again; 2819 } 2820 2821 unix_state_unlock(sk); 2822 2823 if (check_creds) { 2824 /* Never glue messages from different writers */ 2825 if (!unix_skb_scm_eq(skb, &scm)) 2826 break; 2827 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2828 /* Copy credentials */ 2829 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2830 unix_set_secdata(&scm, skb); 2831 check_creds = true; 2832 } 2833 2834 /* Copy address just once */ 2835 if (state->msg && state->msg->msg_name) { 2836 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2837 state->msg->msg_name); 2838 unix_copy_addr(state->msg, skb->sk); 2839 sunaddr = NULL; 2840 } 2841 2842 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2843 skb_get(skb); 2844 chunk = state->recv_actor(skb, skip, chunk, state); 2845 drop_skb = !unix_skb_len(skb); 2846 /* skb is only safe to use if !drop_skb */ 2847 consume_skb(skb); 2848 if (chunk < 0) { 2849 if (copied == 0) 2850 copied = -EFAULT; 2851 break; 2852 } 2853 copied += chunk; 2854 size -= chunk; 2855 2856 if (drop_skb) { 2857 /* the skb was touched by a concurrent reader; 2858 * we should not expect anything from this skb 2859 * anymore and assume it invalid - we can be 2860 * sure it was dropped from the socket queue 2861 * 2862 * let's report a short read 2863 */ 2864 err = 0; 2865 break; 2866 } 2867 2868 /* Mark read part of skb as used */ 2869 if (!(flags & MSG_PEEK)) { 2870 UNIXCB(skb).consumed += chunk; 2871 2872 sk_peek_offset_bwd(sk, chunk); 2873 2874 if (UNIXCB(skb).fp) { 2875 scm_stat_del(sk, skb); 2876 unix_detach_fds(&scm, skb); 2877 } 2878 2879 if (unix_skb_len(skb)) 2880 break; 2881 2882 skb_unlink(skb, &sk->sk_receive_queue); 2883 consume_skb(skb); 2884 2885 if (scm.fp) 2886 break; 2887 } else { 2888 /* It is questionable, see note in unix_dgram_recvmsg. 2889 */ 2890 if (UNIXCB(skb).fp) 2891 unix_peek_fds(&scm, skb); 2892 2893 sk_peek_offset_fwd(sk, chunk); 2894 2895 if (UNIXCB(skb).fp) 2896 break; 2897 2898 skip = 0; 2899 last = skb; 2900 last_len = skb->len; 2901 unix_state_lock(sk); 2902 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2903 if (skb) 2904 goto again; 2905 unix_state_unlock(sk); 2906 break; 2907 } 2908 } while (size); 2909 2910 mutex_unlock(&u->iolock); 2911 if (state->msg) 2912 scm_recv(sock, state->msg, &scm, flags); 2913 else 2914 scm_destroy(&scm); 2915 out: 2916 return copied ? : err; 2917 } 2918 2919 static int unix_stream_read_actor(struct sk_buff *skb, 2920 int skip, int chunk, 2921 struct unix_stream_read_state *state) 2922 { 2923 int ret; 2924 2925 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2926 state->msg, chunk); 2927 return ret ?: chunk; 2928 } 2929 2930 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2931 size_t size, int flags) 2932 { 2933 struct unix_stream_read_state state = { 2934 .recv_actor = unix_stream_read_actor, 2935 .socket = sk->sk_socket, 2936 .msg = msg, 2937 .size = size, 2938 .flags = flags 2939 }; 2940 2941 return unix_stream_read_generic(&state, true); 2942 } 2943 2944 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2945 size_t size, int flags) 2946 { 2947 struct unix_stream_read_state state = { 2948 .recv_actor = unix_stream_read_actor, 2949 .socket = sock, 2950 .msg = msg, 2951 .size = size, 2952 .flags = flags 2953 }; 2954 2955 #ifdef CONFIG_BPF_SYSCALL 2956 struct sock *sk = sock->sk; 2957 const struct proto *prot = READ_ONCE(sk->sk_prot); 2958 2959 if (prot != &unix_stream_proto) 2960 return prot->recvmsg(sk, msg, size, flags, NULL); 2961 #endif 2962 return unix_stream_read_generic(&state, true); 2963 } 2964 2965 static int unix_stream_splice_actor(struct sk_buff *skb, 2966 int skip, int chunk, 2967 struct unix_stream_read_state *state) 2968 { 2969 return skb_splice_bits(skb, state->socket->sk, 2970 UNIXCB(skb).consumed + skip, 2971 state->pipe, chunk, state->splice_flags); 2972 } 2973 2974 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2975 struct pipe_inode_info *pipe, 2976 size_t size, unsigned int flags) 2977 { 2978 struct unix_stream_read_state state = { 2979 .recv_actor = unix_stream_splice_actor, 2980 .socket = sock, 2981 .pipe = pipe, 2982 .size = size, 2983 .splice_flags = flags, 2984 }; 2985 2986 if (unlikely(*ppos)) 2987 return -ESPIPE; 2988 2989 if (sock->file->f_flags & O_NONBLOCK || 2990 flags & SPLICE_F_NONBLOCK) 2991 state.flags = MSG_DONTWAIT; 2992 2993 return unix_stream_read_generic(&state, false); 2994 } 2995 2996 static int unix_shutdown(struct socket *sock, int mode) 2997 { 2998 struct sock *sk = sock->sk; 2999 struct sock *other; 3000 3001 if (mode < SHUT_RD || mode > SHUT_RDWR) 3002 return -EINVAL; 3003 /* This maps: 3004 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 3005 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 3006 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 3007 */ 3008 ++mode; 3009 3010 unix_state_lock(sk); 3011 sk->sk_shutdown |= mode; 3012 other = unix_peer(sk); 3013 if (other) 3014 sock_hold(other); 3015 unix_state_unlock(sk); 3016 sk->sk_state_change(sk); 3017 3018 if (other && 3019 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 3020 3021 int peer_mode = 0; 3022 const struct proto *prot = READ_ONCE(other->sk_prot); 3023 3024 if (prot->unhash) 3025 prot->unhash(other); 3026 if (mode&RCV_SHUTDOWN) 3027 peer_mode |= SEND_SHUTDOWN; 3028 if (mode&SEND_SHUTDOWN) 3029 peer_mode |= RCV_SHUTDOWN; 3030 unix_state_lock(other); 3031 other->sk_shutdown |= peer_mode; 3032 unix_state_unlock(other); 3033 other->sk_state_change(other); 3034 if (peer_mode == SHUTDOWN_MASK) 3035 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 3036 else if (peer_mode & RCV_SHUTDOWN) 3037 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 3038 } 3039 if (other) 3040 sock_put(other); 3041 3042 return 0; 3043 } 3044 3045 long unix_inq_len(struct sock *sk) 3046 { 3047 struct sk_buff *skb; 3048 long amount = 0; 3049 3050 if (sk->sk_state == TCP_LISTEN) 3051 return -EINVAL; 3052 3053 spin_lock(&sk->sk_receive_queue.lock); 3054 if (sk->sk_type == SOCK_STREAM || 3055 sk->sk_type == SOCK_SEQPACKET) { 3056 skb_queue_walk(&sk->sk_receive_queue, skb) 3057 amount += unix_skb_len(skb); 3058 } else { 3059 skb = skb_peek(&sk->sk_receive_queue); 3060 if (skb) 3061 amount = skb->len; 3062 } 3063 spin_unlock(&sk->sk_receive_queue.lock); 3064 3065 return amount; 3066 } 3067 EXPORT_SYMBOL_GPL(unix_inq_len); 3068 3069 long unix_outq_len(struct sock *sk) 3070 { 3071 return sk_wmem_alloc_get(sk); 3072 } 3073 EXPORT_SYMBOL_GPL(unix_outq_len); 3074 3075 static int unix_open_file(struct sock *sk) 3076 { 3077 struct path path; 3078 struct file *f; 3079 int fd; 3080 3081 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3082 return -EPERM; 3083 3084 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3085 return -ENOENT; 3086 3087 path = unix_sk(sk)->path; 3088 if (!path.dentry) 3089 return -ENOENT; 3090 3091 path_get(&path); 3092 3093 fd = get_unused_fd_flags(O_CLOEXEC); 3094 if (fd < 0) 3095 goto out; 3096 3097 f = dentry_open(&path, O_PATH, current_cred()); 3098 if (IS_ERR(f)) { 3099 put_unused_fd(fd); 3100 fd = PTR_ERR(f); 3101 goto out; 3102 } 3103 3104 fd_install(fd, f); 3105 out: 3106 path_put(&path); 3107 3108 return fd; 3109 } 3110 3111 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3112 { 3113 struct sock *sk = sock->sk; 3114 long amount = 0; 3115 int err; 3116 3117 switch (cmd) { 3118 case SIOCOUTQ: 3119 amount = unix_outq_len(sk); 3120 err = put_user(amount, (int __user *)arg); 3121 break; 3122 case SIOCINQ: 3123 amount = unix_inq_len(sk); 3124 if (amount < 0) 3125 err = amount; 3126 else 3127 err = put_user(amount, (int __user *)arg); 3128 break; 3129 case SIOCUNIXFILE: 3130 err = unix_open_file(sk); 3131 break; 3132 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3133 case SIOCATMARK: 3134 { 3135 struct sk_buff *skb; 3136 int answ = 0; 3137 3138 skb = skb_peek(&sk->sk_receive_queue); 3139 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3140 answ = 1; 3141 err = put_user(answ, (int __user *)arg); 3142 } 3143 break; 3144 #endif 3145 default: 3146 err = -ENOIOCTLCMD; 3147 break; 3148 } 3149 return err; 3150 } 3151 3152 #ifdef CONFIG_COMPAT 3153 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3154 { 3155 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3156 } 3157 #endif 3158 3159 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3160 { 3161 struct sock *sk = sock->sk; 3162 __poll_t mask; 3163 3164 sock_poll_wait(file, sock, wait); 3165 mask = 0; 3166 3167 /* exceptional events? */ 3168 if (READ_ONCE(sk->sk_err)) 3169 mask |= EPOLLERR; 3170 if (sk->sk_shutdown == SHUTDOWN_MASK) 3171 mask |= EPOLLHUP; 3172 if (sk->sk_shutdown & RCV_SHUTDOWN) 3173 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3174 3175 /* readable? */ 3176 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3177 mask |= EPOLLIN | EPOLLRDNORM; 3178 if (sk_is_readable(sk)) 3179 mask |= EPOLLIN | EPOLLRDNORM; 3180 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3181 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3182 mask |= EPOLLPRI; 3183 #endif 3184 3185 /* Connection-based need to check for termination and startup */ 3186 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3187 sk->sk_state == TCP_CLOSE) 3188 mask |= EPOLLHUP; 3189 3190 /* 3191 * we set writable also when the other side has shut down the 3192 * connection. This prevents stuck sockets. 3193 */ 3194 if (unix_writable(sk)) 3195 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3196 3197 return mask; 3198 } 3199 3200 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3201 poll_table *wait) 3202 { 3203 struct sock *sk = sock->sk, *other; 3204 unsigned int writable; 3205 __poll_t mask; 3206 3207 sock_poll_wait(file, sock, wait); 3208 mask = 0; 3209 3210 /* exceptional events? */ 3211 if (READ_ONCE(sk->sk_err) || 3212 !skb_queue_empty_lockless(&sk->sk_error_queue)) 3213 mask |= EPOLLERR | 3214 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3215 3216 if (sk->sk_shutdown & RCV_SHUTDOWN) 3217 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3218 if (sk->sk_shutdown == SHUTDOWN_MASK) 3219 mask |= EPOLLHUP; 3220 3221 /* readable? */ 3222 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3223 mask |= EPOLLIN | EPOLLRDNORM; 3224 if (sk_is_readable(sk)) 3225 mask |= EPOLLIN | EPOLLRDNORM; 3226 3227 /* Connection-based need to check for termination and startup */ 3228 if (sk->sk_type == SOCK_SEQPACKET) { 3229 if (sk->sk_state == TCP_CLOSE) 3230 mask |= EPOLLHUP; 3231 /* connection hasn't started yet? */ 3232 if (sk->sk_state == TCP_SYN_SENT) 3233 return mask; 3234 } 3235 3236 /* No write status requested, avoid expensive OUT tests. */ 3237 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3238 return mask; 3239 3240 writable = unix_writable(sk); 3241 if (writable) { 3242 unix_state_lock(sk); 3243 3244 other = unix_peer(sk); 3245 if (other && unix_peer(other) != sk && 3246 unix_recvq_full_lockless(other) && 3247 unix_dgram_peer_wake_me(sk, other)) 3248 writable = 0; 3249 3250 unix_state_unlock(sk); 3251 } 3252 3253 if (writable) 3254 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3255 else 3256 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3257 3258 return mask; 3259 } 3260 3261 #ifdef CONFIG_PROC_FS 3262 3263 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3264 3265 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3266 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3267 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3268 3269 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3270 { 3271 unsigned long offset = get_offset(*pos); 3272 unsigned long bucket = get_bucket(*pos); 3273 unsigned long count = 0; 3274 struct sock *sk; 3275 3276 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3277 sk; sk = sk_next(sk)) { 3278 if (++count == offset) 3279 break; 3280 } 3281 3282 return sk; 3283 } 3284 3285 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3286 { 3287 unsigned long bucket = get_bucket(*pos); 3288 struct net *net = seq_file_net(seq); 3289 struct sock *sk; 3290 3291 while (bucket < UNIX_HASH_SIZE) { 3292 spin_lock(&net->unx.table.locks[bucket]); 3293 3294 sk = unix_from_bucket(seq, pos); 3295 if (sk) 3296 return sk; 3297 3298 spin_unlock(&net->unx.table.locks[bucket]); 3299 3300 *pos = set_bucket_offset(++bucket, 1); 3301 } 3302 3303 return NULL; 3304 } 3305 3306 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3307 loff_t *pos) 3308 { 3309 unsigned long bucket = get_bucket(*pos); 3310 3311 sk = sk_next(sk); 3312 if (sk) 3313 return sk; 3314 3315 3316 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3317 3318 *pos = set_bucket_offset(++bucket, 1); 3319 3320 return unix_get_first(seq, pos); 3321 } 3322 3323 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3324 { 3325 if (!*pos) 3326 return SEQ_START_TOKEN; 3327 3328 return unix_get_first(seq, pos); 3329 } 3330 3331 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3332 { 3333 ++*pos; 3334 3335 if (v == SEQ_START_TOKEN) 3336 return unix_get_first(seq, pos); 3337 3338 return unix_get_next(seq, v, pos); 3339 } 3340 3341 static void unix_seq_stop(struct seq_file *seq, void *v) 3342 { 3343 struct sock *sk = v; 3344 3345 if (sk) 3346 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3347 } 3348 3349 static int unix_seq_show(struct seq_file *seq, void *v) 3350 { 3351 3352 if (v == SEQ_START_TOKEN) 3353 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3354 "Inode Path\n"); 3355 else { 3356 struct sock *s = v; 3357 struct unix_sock *u = unix_sk(s); 3358 unix_state_lock(s); 3359 3360 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3361 s, 3362 refcount_read(&s->sk_refcnt), 3363 0, 3364 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3365 s->sk_type, 3366 s->sk_socket ? 3367 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3368 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3369 sock_i_ino(s)); 3370 3371 if (u->addr) { // under a hash table lock here 3372 int i, len; 3373 seq_putc(seq, ' '); 3374 3375 i = 0; 3376 len = u->addr->len - 3377 offsetof(struct sockaddr_un, sun_path); 3378 if (u->addr->name->sun_path[0]) { 3379 len--; 3380 } else { 3381 seq_putc(seq, '@'); 3382 i++; 3383 } 3384 for ( ; i < len; i++) 3385 seq_putc(seq, u->addr->name->sun_path[i] ?: 3386 '@'); 3387 } 3388 unix_state_unlock(s); 3389 seq_putc(seq, '\n'); 3390 } 3391 3392 return 0; 3393 } 3394 3395 static const struct seq_operations unix_seq_ops = { 3396 .start = unix_seq_start, 3397 .next = unix_seq_next, 3398 .stop = unix_seq_stop, 3399 .show = unix_seq_show, 3400 }; 3401 3402 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3403 struct bpf_unix_iter_state { 3404 struct seq_net_private p; 3405 unsigned int cur_sk; 3406 unsigned int end_sk; 3407 unsigned int max_sk; 3408 struct sock **batch; 3409 bool st_bucket_done; 3410 }; 3411 3412 struct bpf_iter__unix { 3413 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3414 __bpf_md_ptr(struct unix_sock *, unix_sk); 3415 uid_t uid __aligned(8); 3416 }; 3417 3418 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3419 struct unix_sock *unix_sk, uid_t uid) 3420 { 3421 struct bpf_iter__unix ctx; 3422 3423 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3424 ctx.meta = meta; 3425 ctx.unix_sk = unix_sk; 3426 ctx.uid = uid; 3427 return bpf_iter_run_prog(prog, &ctx); 3428 } 3429 3430 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3431 3432 { 3433 struct bpf_unix_iter_state *iter = seq->private; 3434 unsigned int expected = 1; 3435 struct sock *sk; 3436 3437 sock_hold(start_sk); 3438 iter->batch[iter->end_sk++] = start_sk; 3439 3440 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3441 if (iter->end_sk < iter->max_sk) { 3442 sock_hold(sk); 3443 iter->batch[iter->end_sk++] = sk; 3444 } 3445 3446 expected++; 3447 } 3448 3449 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3450 3451 return expected; 3452 } 3453 3454 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3455 { 3456 while (iter->cur_sk < iter->end_sk) 3457 sock_put(iter->batch[iter->cur_sk++]); 3458 } 3459 3460 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3461 unsigned int new_batch_sz) 3462 { 3463 struct sock **new_batch; 3464 3465 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3466 GFP_USER | __GFP_NOWARN); 3467 if (!new_batch) 3468 return -ENOMEM; 3469 3470 bpf_iter_unix_put_batch(iter); 3471 kvfree(iter->batch); 3472 iter->batch = new_batch; 3473 iter->max_sk = new_batch_sz; 3474 3475 return 0; 3476 } 3477 3478 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3479 loff_t *pos) 3480 { 3481 struct bpf_unix_iter_state *iter = seq->private; 3482 unsigned int expected; 3483 bool resized = false; 3484 struct sock *sk; 3485 3486 if (iter->st_bucket_done) 3487 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3488 3489 again: 3490 /* Get a new batch */ 3491 iter->cur_sk = 0; 3492 iter->end_sk = 0; 3493 3494 sk = unix_get_first(seq, pos); 3495 if (!sk) 3496 return NULL; /* Done */ 3497 3498 expected = bpf_iter_unix_hold_batch(seq, sk); 3499 3500 if (iter->end_sk == expected) { 3501 iter->st_bucket_done = true; 3502 return sk; 3503 } 3504 3505 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3506 resized = true; 3507 goto again; 3508 } 3509 3510 return sk; 3511 } 3512 3513 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3514 { 3515 if (!*pos) 3516 return SEQ_START_TOKEN; 3517 3518 /* bpf iter does not support lseek, so it always 3519 * continue from where it was stop()-ped. 3520 */ 3521 return bpf_iter_unix_batch(seq, pos); 3522 } 3523 3524 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3525 { 3526 struct bpf_unix_iter_state *iter = seq->private; 3527 struct sock *sk; 3528 3529 /* Whenever seq_next() is called, the iter->cur_sk is 3530 * done with seq_show(), so advance to the next sk in 3531 * the batch. 3532 */ 3533 if (iter->cur_sk < iter->end_sk) 3534 sock_put(iter->batch[iter->cur_sk++]); 3535 3536 ++*pos; 3537 3538 if (iter->cur_sk < iter->end_sk) 3539 sk = iter->batch[iter->cur_sk]; 3540 else 3541 sk = bpf_iter_unix_batch(seq, pos); 3542 3543 return sk; 3544 } 3545 3546 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3547 { 3548 struct bpf_iter_meta meta; 3549 struct bpf_prog *prog; 3550 struct sock *sk = v; 3551 uid_t uid; 3552 bool slow; 3553 int ret; 3554 3555 if (v == SEQ_START_TOKEN) 3556 return 0; 3557 3558 slow = lock_sock_fast(sk); 3559 3560 if (unlikely(sk_unhashed(sk))) { 3561 ret = SEQ_SKIP; 3562 goto unlock; 3563 } 3564 3565 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3566 meta.seq = seq; 3567 prog = bpf_iter_get_info(&meta, false); 3568 ret = unix_prog_seq_show(prog, &meta, v, uid); 3569 unlock: 3570 unlock_sock_fast(sk, slow); 3571 return ret; 3572 } 3573 3574 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3575 { 3576 struct bpf_unix_iter_state *iter = seq->private; 3577 struct bpf_iter_meta meta; 3578 struct bpf_prog *prog; 3579 3580 if (!v) { 3581 meta.seq = seq; 3582 prog = bpf_iter_get_info(&meta, true); 3583 if (prog) 3584 (void)unix_prog_seq_show(prog, &meta, v, 0); 3585 } 3586 3587 if (iter->cur_sk < iter->end_sk) 3588 bpf_iter_unix_put_batch(iter); 3589 } 3590 3591 static const struct seq_operations bpf_iter_unix_seq_ops = { 3592 .start = bpf_iter_unix_seq_start, 3593 .next = bpf_iter_unix_seq_next, 3594 .stop = bpf_iter_unix_seq_stop, 3595 .show = bpf_iter_unix_seq_show, 3596 }; 3597 #endif 3598 #endif 3599 3600 static const struct net_proto_family unix_family_ops = { 3601 .family = PF_UNIX, 3602 .create = unix_create, 3603 .owner = THIS_MODULE, 3604 }; 3605 3606 3607 static int __net_init unix_net_init(struct net *net) 3608 { 3609 int i; 3610 3611 net->unx.sysctl_max_dgram_qlen = 10; 3612 if (unix_sysctl_register(net)) 3613 goto out; 3614 3615 #ifdef CONFIG_PROC_FS 3616 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3617 sizeof(struct seq_net_private))) 3618 goto err_sysctl; 3619 #endif 3620 3621 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3622 sizeof(spinlock_t), GFP_KERNEL); 3623 if (!net->unx.table.locks) 3624 goto err_proc; 3625 3626 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3627 sizeof(struct hlist_head), 3628 GFP_KERNEL); 3629 if (!net->unx.table.buckets) 3630 goto free_locks; 3631 3632 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3633 spin_lock_init(&net->unx.table.locks[i]); 3634 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3635 } 3636 3637 return 0; 3638 3639 free_locks: 3640 kvfree(net->unx.table.locks); 3641 err_proc: 3642 #ifdef CONFIG_PROC_FS 3643 remove_proc_entry("unix", net->proc_net); 3644 err_sysctl: 3645 #endif 3646 unix_sysctl_unregister(net); 3647 out: 3648 return -ENOMEM; 3649 } 3650 3651 static void __net_exit unix_net_exit(struct net *net) 3652 { 3653 kvfree(net->unx.table.buckets); 3654 kvfree(net->unx.table.locks); 3655 unix_sysctl_unregister(net); 3656 remove_proc_entry("unix", net->proc_net); 3657 } 3658 3659 static struct pernet_operations unix_net_ops = { 3660 .init = unix_net_init, 3661 .exit = unix_net_exit, 3662 }; 3663 3664 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3665 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3666 struct unix_sock *unix_sk, uid_t uid) 3667 3668 #define INIT_BATCH_SZ 16 3669 3670 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3671 { 3672 struct bpf_unix_iter_state *iter = priv_data; 3673 int err; 3674 3675 err = bpf_iter_init_seq_net(priv_data, aux); 3676 if (err) 3677 return err; 3678 3679 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3680 if (err) { 3681 bpf_iter_fini_seq_net(priv_data); 3682 return err; 3683 } 3684 3685 return 0; 3686 } 3687 3688 static void bpf_iter_fini_unix(void *priv_data) 3689 { 3690 struct bpf_unix_iter_state *iter = priv_data; 3691 3692 bpf_iter_fini_seq_net(priv_data); 3693 kvfree(iter->batch); 3694 } 3695 3696 static const struct bpf_iter_seq_info unix_seq_info = { 3697 .seq_ops = &bpf_iter_unix_seq_ops, 3698 .init_seq_private = bpf_iter_init_unix, 3699 .fini_seq_private = bpf_iter_fini_unix, 3700 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3701 }; 3702 3703 static const struct bpf_func_proto * 3704 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3705 const struct bpf_prog *prog) 3706 { 3707 switch (func_id) { 3708 case BPF_FUNC_setsockopt: 3709 return &bpf_sk_setsockopt_proto; 3710 case BPF_FUNC_getsockopt: 3711 return &bpf_sk_getsockopt_proto; 3712 default: 3713 return NULL; 3714 } 3715 } 3716 3717 static struct bpf_iter_reg unix_reg_info = { 3718 .target = "unix", 3719 .ctx_arg_info_size = 1, 3720 .ctx_arg_info = { 3721 { offsetof(struct bpf_iter__unix, unix_sk), 3722 PTR_TO_BTF_ID_OR_NULL }, 3723 }, 3724 .get_func_proto = bpf_iter_unix_get_func_proto, 3725 .seq_info = &unix_seq_info, 3726 }; 3727 3728 static void __init bpf_iter_register(void) 3729 { 3730 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3731 if (bpf_iter_reg_target(&unix_reg_info)) 3732 pr_warn("Warning: could not register bpf iterator unix\n"); 3733 } 3734 #endif 3735 3736 static int __init af_unix_init(void) 3737 { 3738 int i, rc = -1; 3739 3740 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3741 3742 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3743 spin_lock_init(&bsd_socket_locks[i]); 3744 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3745 } 3746 3747 rc = proto_register(&unix_dgram_proto, 1); 3748 if (rc != 0) { 3749 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3750 goto out; 3751 } 3752 3753 rc = proto_register(&unix_stream_proto, 1); 3754 if (rc != 0) { 3755 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3756 proto_unregister(&unix_dgram_proto); 3757 goto out; 3758 } 3759 3760 sock_register(&unix_family_ops); 3761 register_pernet_subsys(&unix_net_ops); 3762 unix_bpf_build_proto(); 3763 3764 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3765 bpf_iter_register(); 3766 #endif 3767 3768 out: 3769 return rc; 3770 } 3771 3772 static void __exit af_unix_exit(void) 3773 { 3774 sock_unregister(PF_UNIX); 3775 proto_unregister(&unix_dgram_proto); 3776 proto_unregister(&unix_stream_proto); 3777 unregister_pernet_subsys(&unix_net_ops); 3778 } 3779 3780 /* Earlier than device_initcall() so that other drivers invoking 3781 request_module() don't end up in a loop when modprobe tries 3782 to use a UNIX socket. But later than subsys_initcall() because 3783 we depend on stuff initialised there */ 3784 fs_initcall(af_unix_init); 3785 module_exit(af_unix_exit); 3786 3787 MODULE_LICENSE("GPL"); 3788 MODULE_ALIAS_NETPROTO(PF_UNIX); 3789