1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/freezer.h> 116 #include <linux/file.h> 117 #include <linux/btf_ids.h> 118 119 #include "scm.h" 120 121 static atomic_long_t unix_nr_socks; 122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 124 125 /* SMP locking strategy: 126 * hash table is protected with spinlock. 127 * each socket state is protected by separate spinlock. 128 */ 129 130 static unsigned int unix_unbound_hash(struct sock *sk) 131 { 132 unsigned long hash = (unsigned long)sk; 133 134 hash ^= hash >> 16; 135 hash ^= hash >> 8; 136 hash ^= sk->sk_type; 137 138 return hash & UNIX_HASH_MOD; 139 } 140 141 static unsigned int unix_bsd_hash(struct inode *i) 142 { 143 return i->i_ino & UNIX_HASH_MOD; 144 } 145 146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 147 int addr_len, int type) 148 { 149 __wsum csum = csum_partial(sunaddr, addr_len, 0); 150 unsigned int hash; 151 152 hash = (__force unsigned int)csum_fold(csum); 153 hash ^= hash >> 8; 154 hash ^= type; 155 156 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 157 } 158 159 static void unix_table_double_lock(struct net *net, 160 unsigned int hash1, unsigned int hash2) 161 { 162 if (hash1 == hash2) { 163 spin_lock(&net->unx.table.locks[hash1]); 164 return; 165 } 166 167 if (hash1 > hash2) 168 swap(hash1, hash2); 169 170 spin_lock(&net->unx.table.locks[hash1]); 171 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 172 } 173 174 static void unix_table_double_unlock(struct net *net, 175 unsigned int hash1, unsigned int hash2) 176 { 177 if (hash1 == hash2) { 178 spin_unlock(&net->unx.table.locks[hash1]); 179 return; 180 } 181 182 spin_unlock(&net->unx.table.locks[hash1]); 183 spin_unlock(&net->unx.table.locks[hash2]); 184 } 185 186 #ifdef CONFIG_SECURITY_NETWORK 187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 188 { 189 UNIXCB(skb).secid = scm->secid; 190 } 191 192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 193 { 194 scm->secid = UNIXCB(skb).secid; 195 } 196 197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 198 { 199 return (scm->secid == UNIXCB(skb).secid); 200 } 201 #else 202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 203 { } 204 205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 206 { } 207 208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 209 { 210 return true; 211 } 212 #endif /* CONFIG_SECURITY_NETWORK */ 213 214 #define unix_peer(sk) (unix_sk(sk)->peer) 215 216 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 217 { 218 return unix_peer(osk) == sk; 219 } 220 221 static inline int unix_may_send(struct sock *sk, struct sock *osk) 222 { 223 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 224 } 225 226 static inline int unix_recvq_full(const struct sock *sk) 227 { 228 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 229 } 230 231 static inline int unix_recvq_full_lockless(const struct sock *sk) 232 { 233 return skb_queue_len_lockless(&sk->sk_receive_queue) > 234 READ_ONCE(sk->sk_max_ack_backlog); 235 } 236 237 struct sock *unix_peer_get(struct sock *s) 238 { 239 struct sock *peer; 240 241 unix_state_lock(s); 242 peer = unix_peer(s); 243 if (peer) 244 sock_hold(peer); 245 unix_state_unlock(s); 246 return peer; 247 } 248 EXPORT_SYMBOL_GPL(unix_peer_get); 249 250 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 251 int addr_len) 252 { 253 struct unix_address *addr; 254 255 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 256 if (!addr) 257 return NULL; 258 259 refcount_set(&addr->refcnt, 1); 260 addr->len = addr_len; 261 memcpy(addr->name, sunaddr, addr_len); 262 263 return addr; 264 } 265 266 static inline void unix_release_addr(struct unix_address *addr) 267 { 268 if (refcount_dec_and_test(&addr->refcnt)) 269 kfree(addr); 270 } 271 272 /* 273 * Check unix socket name: 274 * - should be not zero length. 275 * - if started by not zero, should be NULL terminated (FS object) 276 * - if started by zero, it is abstract name. 277 */ 278 279 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 280 { 281 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 282 addr_len > sizeof(*sunaddr)) 283 return -EINVAL; 284 285 if (sunaddr->sun_family != AF_UNIX) 286 return -EINVAL; 287 288 return 0; 289 } 290 291 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 292 { 293 /* This may look like an off by one error but it is a bit more 294 * subtle. 108 is the longest valid AF_UNIX path for a binding. 295 * sun_path[108] doesn't as such exist. However in kernel space 296 * we are guaranteed that it is a valid memory location in our 297 * kernel address buffer because syscall functions always pass 298 * a pointer of struct sockaddr_storage which has a bigger buffer 299 * than 108. 300 */ 301 ((char *)sunaddr)[addr_len] = 0; 302 } 303 304 static void __unix_remove_socket(struct sock *sk) 305 { 306 sk_del_node_init(sk); 307 } 308 309 static void __unix_insert_socket(struct net *net, struct sock *sk) 310 { 311 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 312 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 313 } 314 315 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 316 struct unix_address *addr, unsigned int hash) 317 { 318 __unix_remove_socket(sk); 319 smp_store_release(&unix_sk(sk)->addr, addr); 320 321 sk->sk_hash = hash; 322 __unix_insert_socket(net, sk); 323 } 324 325 static void unix_remove_socket(struct net *net, struct sock *sk) 326 { 327 spin_lock(&net->unx.table.locks[sk->sk_hash]); 328 __unix_remove_socket(sk); 329 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 330 } 331 332 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 333 { 334 spin_lock(&net->unx.table.locks[sk->sk_hash]); 335 __unix_insert_socket(net, sk); 336 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 337 } 338 339 static void unix_insert_bsd_socket(struct sock *sk) 340 { 341 spin_lock(&bsd_socket_locks[sk->sk_hash]); 342 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 343 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 344 } 345 346 static void unix_remove_bsd_socket(struct sock *sk) 347 { 348 if (!hlist_unhashed(&sk->sk_bind_node)) { 349 spin_lock(&bsd_socket_locks[sk->sk_hash]); 350 __sk_del_bind_node(sk); 351 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 352 353 sk_node_init(&sk->sk_bind_node); 354 } 355 } 356 357 static struct sock *__unix_find_socket_byname(struct net *net, 358 struct sockaddr_un *sunname, 359 int len, unsigned int hash) 360 { 361 struct sock *s; 362 363 sk_for_each(s, &net->unx.table.buckets[hash]) { 364 struct unix_sock *u = unix_sk(s); 365 366 if (u->addr->len == len && 367 !memcmp(u->addr->name, sunname, len)) 368 return s; 369 } 370 return NULL; 371 } 372 373 static inline struct sock *unix_find_socket_byname(struct net *net, 374 struct sockaddr_un *sunname, 375 int len, unsigned int hash) 376 { 377 struct sock *s; 378 379 spin_lock(&net->unx.table.locks[hash]); 380 s = __unix_find_socket_byname(net, sunname, len, hash); 381 if (s) 382 sock_hold(s); 383 spin_unlock(&net->unx.table.locks[hash]); 384 return s; 385 } 386 387 static struct sock *unix_find_socket_byinode(struct inode *i) 388 { 389 unsigned int hash = unix_bsd_hash(i); 390 struct sock *s; 391 392 spin_lock(&bsd_socket_locks[hash]); 393 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 394 struct dentry *dentry = unix_sk(s)->path.dentry; 395 396 if (dentry && d_backing_inode(dentry) == i) { 397 sock_hold(s); 398 spin_unlock(&bsd_socket_locks[hash]); 399 return s; 400 } 401 } 402 spin_unlock(&bsd_socket_locks[hash]); 403 return NULL; 404 } 405 406 /* Support code for asymmetrically connected dgram sockets 407 * 408 * If a datagram socket is connected to a socket not itself connected 409 * to the first socket (eg, /dev/log), clients may only enqueue more 410 * messages if the present receive queue of the server socket is not 411 * "too large". This means there's a second writeability condition 412 * poll and sendmsg need to test. The dgram recv code will do a wake 413 * up on the peer_wait wait queue of a socket upon reception of a 414 * datagram which needs to be propagated to sleeping would-be writers 415 * since these might not have sent anything so far. This can't be 416 * accomplished via poll_wait because the lifetime of the server 417 * socket might be less than that of its clients if these break their 418 * association with it or if the server socket is closed while clients 419 * are still connected to it and there's no way to inform "a polling 420 * implementation" that it should let go of a certain wait queue 421 * 422 * In order to propagate a wake up, a wait_queue_entry_t of the client 423 * socket is enqueued on the peer_wait queue of the server socket 424 * whose wake function does a wake_up on the ordinary client socket 425 * wait queue. This connection is established whenever a write (or 426 * poll for write) hit the flow control condition and broken when the 427 * association to the server socket is dissolved or after a wake up 428 * was relayed. 429 */ 430 431 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 432 void *key) 433 { 434 struct unix_sock *u; 435 wait_queue_head_t *u_sleep; 436 437 u = container_of(q, struct unix_sock, peer_wake); 438 439 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 440 q); 441 u->peer_wake.private = NULL; 442 443 /* relaying can only happen while the wq still exists */ 444 u_sleep = sk_sleep(&u->sk); 445 if (u_sleep) 446 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 447 448 return 0; 449 } 450 451 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 452 { 453 struct unix_sock *u, *u_other; 454 int rc; 455 456 u = unix_sk(sk); 457 u_other = unix_sk(other); 458 rc = 0; 459 spin_lock(&u_other->peer_wait.lock); 460 461 if (!u->peer_wake.private) { 462 u->peer_wake.private = other; 463 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 464 465 rc = 1; 466 } 467 468 spin_unlock(&u_other->peer_wait.lock); 469 return rc; 470 } 471 472 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 473 struct sock *other) 474 { 475 struct unix_sock *u, *u_other; 476 477 u = unix_sk(sk); 478 u_other = unix_sk(other); 479 spin_lock(&u_other->peer_wait.lock); 480 481 if (u->peer_wake.private == other) { 482 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 483 u->peer_wake.private = NULL; 484 } 485 486 spin_unlock(&u_other->peer_wait.lock); 487 } 488 489 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 490 struct sock *other) 491 { 492 unix_dgram_peer_wake_disconnect(sk, other); 493 wake_up_interruptible_poll(sk_sleep(sk), 494 EPOLLOUT | 495 EPOLLWRNORM | 496 EPOLLWRBAND); 497 } 498 499 /* preconditions: 500 * - unix_peer(sk) == other 501 * - association is stable 502 */ 503 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 504 { 505 int connected; 506 507 connected = unix_dgram_peer_wake_connect(sk, other); 508 509 /* If other is SOCK_DEAD, we want to make sure we signal 510 * POLLOUT, such that a subsequent write() can get a 511 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 512 * to other and its full, we will hang waiting for POLLOUT. 513 */ 514 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 515 return 1; 516 517 if (connected) 518 unix_dgram_peer_wake_disconnect(sk, other); 519 520 return 0; 521 } 522 523 static int unix_writable(const struct sock *sk) 524 { 525 return sk->sk_state != TCP_LISTEN && 526 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 527 } 528 529 static void unix_write_space(struct sock *sk) 530 { 531 struct socket_wq *wq; 532 533 rcu_read_lock(); 534 if (unix_writable(sk)) { 535 wq = rcu_dereference(sk->sk_wq); 536 if (skwq_has_sleeper(wq)) 537 wake_up_interruptible_sync_poll(&wq->wait, 538 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 539 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 540 } 541 rcu_read_unlock(); 542 } 543 544 /* When dgram socket disconnects (or changes its peer), we clear its receive 545 * queue of packets arrived from previous peer. First, it allows to do 546 * flow control based only on wmem_alloc; second, sk connected to peer 547 * may receive messages only from that peer. */ 548 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 549 { 550 if (!skb_queue_empty(&sk->sk_receive_queue)) { 551 skb_queue_purge(&sk->sk_receive_queue); 552 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 553 554 /* If one link of bidirectional dgram pipe is disconnected, 555 * we signal error. Messages are lost. Do not make this, 556 * when peer was not connected to us. 557 */ 558 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 559 other->sk_err = ECONNRESET; 560 sk_error_report(other); 561 } 562 } 563 other->sk_state = TCP_CLOSE; 564 } 565 566 static void unix_sock_destructor(struct sock *sk) 567 { 568 struct unix_sock *u = unix_sk(sk); 569 570 skb_queue_purge(&sk->sk_receive_queue); 571 572 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 573 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 574 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 575 if (!sock_flag(sk, SOCK_DEAD)) { 576 pr_info("Attempt to release alive unix socket: %p\n", sk); 577 return; 578 } 579 580 if (u->addr) 581 unix_release_addr(u->addr); 582 583 atomic_long_dec(&unix_nr_socks); 584 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 585 #ifdef UNIX_REFCNT_DEBUG 586 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 587 atomic_long_read(&unix_nr_socks)); 588 #endif 589 } 590 591 static void unix_release_sock(struct sock *sk, int embrion) 592 { 593 struct unix_sock *u = unix_sk(sk); 594 struct sock *skpair; 595 struct sk_buff *skb; 596 struct path path; 597 int state; 598 599 unix_remove_socket(sock_net(sk), sk); 600 unix_remove_bsd_socket(sk); 601 602 /* Clear state */ 603 unix_state_lock(sk); 604 sock_orphan(sk); 605 sk->sk_shutdown = SHUTDOWN_MASK; 606 path = u->path; 607 u->path.dentry = NULL; 608 u->path.mnt = NULL; 609 state = sk->sk_state; 610 sk->sk_state = TCP_CLOSE; 611 612 skpair = unix_peer(sk); 613 unix_peer(sk) = NULL; 614 615 unix_state_unlock(sk); 616 617 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 618 if (u->oob_skb) { 619 kfree_skb(u->oob_skb); 620 u->oob_skb = NULL; 621 } 622 #endif 623 624 wake_up_interruptible_all(&u->peer_wait); 625 626 if (skpair != NULL) { 627 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 628 unix_state_lock(skpair); 629 /* No more writes */ 630 skpair->sk_shutdown = SHUTDOWN_MASK; 631 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 632 skpair->sk_err = ECONNRESET; 633 unix_state_unlock(skpair); 634 skpair->sk_state_change(skpair); 635 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 636 } 637 638 unix_dgram_peer_wake_disconnect(sk, skpair); 639 sock_put(skpair); /* It may now die */ 640 } 641 642 /* Try to flush out this socket. Throw out buffers at least */ 643 644 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 645 if (state == TCP_LISTEN) 646 unix_release_sock(skb->sk, 1); 647 /* passed fds are erased in the kfree_skb hook */ 648 UNIXCB(skb).consumed = skb->len; 649 kfree_skb(skb); 650 } 651 652 if (path.dentry) 653 path_put(&path); 654 655 sock_put(sk); 656 657 /* ---- Socket is dead now and most probably destroyed ---- */ 658 659 /* 660 * Fixme: BSD difference: In BSD all sockets connected to us get 661 * ECONNRESET and we die on the spot. In Linux we behave 662 * like files and pipes do and wait for the last 663 * dereference. 664 * 665 * Can't we simply set sock->err? 666 * 667 * What the above comment does talk about? --ANK(980817) 668 */ 669 670 if (unix_tot_inflight) 671 unix_gc(); /* Garbage collect fds */ 672 } 673 674 static void init_peercred(struct sock *sk) 675 { 676 const struct cred *old_cred; 677 struct pid *old_pid; 678 679 spin_lock(&sk->sk_peer_lock); 680 old_pid = sk->sk_peer_pid; 681 old_cred = sk->sk_peer_cred; 682 sk->sk_peer_pid = get_pid(task_tgid(current)); 683 sk->sk_peer_cred = get_current_cred(); 684 spin_unlock(&sk->sk_peer_lock); 685 686 put_pid(old_pid); 687 put_cred(old_cred); 688 } 689 690 static void copy_peercred(struct sock *sk, struct sock *peersk) 691 { 692 const struct cred *old_cred; 693 struct pid *old_pid; 694 695 if (sk < peersk) { 696 spin_lock(&sk->sk_peer_lock); 697 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 698 } else { 699 spin_lock(&peersk->sk_peer_lock); 700 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 701 } 702 old_pid = sk->sk_peer_pid; 703 old_cred = sk->sk_peer_cred; 704 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 705 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 706 707 spin_unlock(&sk->sk_peer_lock); 708 spin_unlock(&peersk->sk_peer_lock); 709 710 put_pid(old_pid); 711 put_cred(old_cred); 712 } 713 714 static int unix_listen(struct socket *sock, int backlog) 715 { 716 int err; 717 struct sock *sk = sock->sk; 718 struct unix_sock *u = unix_sk(sk); 719 720 err = -EOPNOTSUPP; 721 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 722 goto out; /* Only stream/seqpacket sockets accept */ 723 err = -EINVAL; 724 if (!u->addr) 725 goto out; /* No listens on an unbound socket */ 726 unix_state_lock(sk); 727 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 728 goto out_unlock; 729 if (backlog > sk->sk_max_ack_backlog) 730 wake_up_interruptible_all(&u->peer_wait); 731 sk->sk_max_ack_backlog = backlog; 732 sk->sk_state = TCP_LISTEN; 733 /* set credentials so connect can copy them */ 734 init_peercred(sk); 735 err = 0; 736 737 out_unlock: 738 unix_state_unlock(sk); 739 out: 740 return err; 741 } 742 743 static int unix_release(struct socket *); 744 static int unix_bind(struct socket *, struct sockaddr *, int); 745 static int unix_stream_connect(struct socket *, struct sockaddr *, 746 int addr_len, int flags); 747 static int unix_socketpair(struct socket *, struct socket *); 748 static int unix_accept(struct socket *, struct socket *, int, bool); 749 static int unix_getname(struct socket *, struct sockaddr *, int); 750 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 751 static __poll_t unix_dgram_poll(struct file *, struct socket *, 752 poll_table *); 753 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 754 #ifdef CONFIG_COMPAT 755 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 756 #endif 757 static int unix_shutdown(struct socket *, int); 758 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 759 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 760 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 761 size_t size, int flags); 762 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 763 struct pipe_inode_info *, size_t size, 764 unsigned int flags); 765 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 766 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 767 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 768 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 769 static int unix_dgram_connect(struct socket *, struct sockaddr *, 770 int, int); 771 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 772 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 773 int); 774 775 static int unix_set_peek_off(struct sock *sk, int val) 776 { 777 struct unix_sock *u = unix_sk(sk); 778 779 if (mutex_lock_interruptible(&u->iolock)) 780 return -EINTR; 781 782 sk->sk_peek_off = val; 783 mutex_unlock(&u->iolock); 784 785 return 0; 786 } 787 788 #ifdef CONFIG_PROC_FS 789 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 790 { 791 struct sock *sk = sock->sk; 792 struct unix_sock *u; 793 794 if (sk) { 795 u = unix_sk(sock->sk); 796 seq_printf(m, "scm_fds: %u\n", 797 atomic_read(&u->scm_stat.nr_fds)); 798 } 799 } 800 #else 801 #define unix_show_fdinfo NULL 802 #endif 803 804 static const struct proto_ops unix_stream_ops = { 805 .family = PF_UNIX, 806 .owner = THIS_MODULE, 807 .release = unix_release, 808 .bind = unix_bind, 809 .connect = unix_stream_connect, 810 .socketpair = unix_socketpair, 811 .accept = unix_accept, 812 .getname = unix_getname, 813 .poll = unix_poll, 814 .ioctl = unix_ioctl, 815 #ifdef CONFIG_COMPAT 816 .compat_ioctl = unix_compat_ioctl, 817 #endif 818 .listen = unix_listen, 819 .shutdown = unix_shutdown, 820 .sendmsg = unix_stream_sendmsg, 821 .recvmsg = unix_stream_recvmsg, 822 .read_skb = unix_stream_read_skb, 823 .mmap = sock_no_mmap, 824 .sendpage = unix_stream_sendpage, 825 .splice_read = unix_stream_splice_read, 826 .set_peek_off = unix_set_peek_off, 827 .show_fdinfo = unix_show_fdinfo, 828 }; 829 830 static const struct proto_ops unix_dgram_ops = { 831 .family = PF_UNIX, 832 .owner = THIS_MODULE, 833 .release = unix_release, 834 .bind = unix_bind, 835 .connect = unix_dgram_connect, 836 .socketpair = unix_socketpair, 837 .accept = sock_no_accept, 838 .getname = unix_getname, 839 .poll = unix_dgram_poll, 840 .ioctl = unix_ioctl, 841 #ifdef CONFIG_COMPAT 842 .compat_ioctl = unix_compat_ioctl, 843 #endif 844 .listen = sock_no_listen, 845 .shutdown = unix_shutdown, 846 .sendmsg = unix_dgram_sendmsg, 847 .read_skb = unix_read_skb, 848 .recvmsg = unix_dgram_recvmsg, 849 .mmap = sock_no_mmap, 850 .sendpage = sock_no_sendpage, 851 .set_peek_off = unix_set_peek_off, 852 .show_fdinfo = unix_show_fdinfo, 853 }; 854 855 static const struct proto_ops unix_seqpacket_ops = { 856 .family = PF_UNIX, 857 .owner = THIS_MODULE, 858 .release = unix_release, 859 .bind = unix_bind, 860 .connect = unix_stream_connect, 861 .socketpair = unix_socketpair, 862 .accept = unix_accept, 863 .getname = unix_getname, 864 .poll = unix_dgram_poll, 865 .ioctl = unix_ioctl, 866 #ifdef CONFIG_COMPAT 867 .compat_ioctl = unix_compat_ioctl, 868 #endif 869 .listen = unix_listen, 870 .shutdown = unix_shutdown, 871 .sendmsg = unix_seqpacket_sendmsg, 872 .recvmsg = unix_seqpacket_recvmsg, 873 .mmap = sock_no_mmap, 874 .sendpage = sock_no_sendpage, 875 .set_peek_off = unix_set_peek_off, 876 .show_fdinfo = unix_show_fdinfo, 877 }; 878 879 static void unix_close(struct sock *sk, long timeout) 880 { 881 /* Nothing to do here, unix socket does not need a ->close(). 882 * This is merely for sockmap. 883 */ 884 } 885 886 static void unix_unhash(struct sock *sk) 887 { 888 /* Nothing to do here, unix socket does not need a ->unhash(). 889 * This is merely for sockmap. 890 */ 891 } 892 893 struct proto unix_dgram_proto = { 894 .name = "UNIX", 895 .owner = THIS_MODULE, 896 .obj_size = sizeof(struct unix_sock), 897 .close = unix_close, 898 #ifdef CONFIG_BPF_SYSCALL 899 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 900 #endif 901 }; 902 903 struct proto unix_stream_proto = { 904 .name = "UNIX-STREAM", 905 .owner = THIS_MODULE, 906 .obj_size = sizeof(struct unix_sock), 907 .close = unix_close, 908 .unhash = unix_unhash, 909 #ifdef CONFIG_BPF_SYSCALL 910 .psock_update_sk_prot = unix_stream_bpf_update_proto, 911 #endif 912 }; 913 914 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 915 { 916 struct unix_sock *u; 917 struct sock *sk; 918 int err; 919 920 atomic_long_inc(&unix_nr_socks); 921 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 922 err = -ENFILE; 923 goto err; 924 } 925 926 if (type == SOCK_STREAM) 927 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 928 else /*dgram and seqpacket */ 929 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 930 931 if (!sk) { 932 err = -ENOMEM; 933 goto err; 934 } 935 936 sock_init_data(sock, sk); 937 938 sk->sk_hash = unix_unbound_hash(sk); 939 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 940 sk->sk_write_space = unix_write_space; 941 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 942 sk->sk_destruct = unix_sock_destructor; 943 u = unix_sk(sk); 944 u->path.dentry = NULL; 945 u->path.mnt = NULL; 946 spin_lock_init(&u->lock); 947 atomic_long_set(&u->inflight, 0); 948 INIT_LIST_HEAD(&u->link); 949 mutex_init(&u->iolock); /* single task reading lock */ 950 mutex_init(&u->bindlock); /* single task binding lock */ 951 init_waitqueue_head(&u->peer_wait); 952 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 953 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 954 unix_insert_unbound_socket(net, sk); 955 956 sock_prot_inuse_add(net, sk->sk_prot, 1); 957 958 return sk; 959 960 err: 961 atomic_long_dec(&unix_nr_socks); 962 return ERR_PTR(err); 963 } 964 965 static int unix_create(struct net *net, struct socket *sock, int protocol, 966 int kern) 967 { 968 struct sock *sk; 969 970 if (protocol && protocol != PF_UNIX) 971 return -EPROTONOSUPPORT; 972 973 sock->state = SS_UNCONNECTED; 974 975 switch (sock->type) { 976 case SOCK_STREAM: 977 sock->ops = &unix_stream_ops; 978 break; 979 /* 980 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 981 * nothing uses it. 982 */ 983 case SOCK_RAW: 984 sock->type = SOCK_DGRAM; 985 fallthrough; 986 case SOCK_DGRAM: 987 sock->ops = &unix_dgram_ops; 988 break; 989 case SOCK_SEQPACKET: 990 sock->ops = &unix_seqpacket_ops; 991 break; 992 default: 993 return -ESOCKTNOSUPPORT; 994 } 995 996 sk = unix_create1(net, sock, kern, sock->type); 997 if (IS_ERR(sk)) 998 return PTR_ERR(sk); 999 1000 return 0; 1001 } 1002 1003 static int unix_release(struct socket *sock) 1004 { 1005 struct sock *sk = sock->sk; 1006 1007 if (!sk) 1008 return 0; 1009 1010 sk->sk_prot->close(sk, 0); 1011 unix_release_sock(sk, 0); 1012 sock->sk = NULL; 1013 1014 return 0; 1015 } 1016 1017 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1018 int type) 1019 { 1020 struct inode *inode; 1021 struct path path; 1022 struct sock *sk; 1023 int err; 1024 1025 unix_mkname_bsd(sunaddr, addr_len); 1026 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1027 if (err) 1028 goto fail; 1029 1030 err = path_permission(&path, MAY_WRITE); 1031 if (err) 1032 goto path_put; 1033 1034 err = -ECONNREFUSED; 1035 inode = d_backing_inode(path.dentry); 1036 if (!S_ISSOCK(inode->i_mode)) 1037 goto path_put; 1038 1039 sk = unix_find_socket_byinode(inode); 1040 if (!sk) 1041 goto path_put; 1042 1043 err = -EPROTOTYPE; 1044 if (sk->sk_type == type) 1045 touch_atime(&path); 1046 else 1047 goto sock_put; 1048 1049 path_put(&path); 1050 1051 return sk; 1052 1053 sock_put: 1054 sock_put(sk); 1055 path_put: 1056 path_put(&path); 1057 fail: 1058 return ERR_PTR(err); 1059 } 1060 1061 static struct sock *unix_find_abstract(struct net *net, 1062 struct sockaddr_un *sunaddr, 1063 int addr_len, int type) 1064 { 1065 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1066 struct dentry *dentry; 1067 struct sock *sk; 1068 1069 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1070 if (!sk) 1071 return ERR_PTR(-ECONNREFUSED); 1072 1073 dentry = unix_sk(sk)->path.dentry; 1074 if (dentry) 1075 touch_atime(&unix_sk(sk)->path); 1076 1077 return sk; 1078 } 1079 1080 static struct sock *unix_find_other(struct net *net, 1081 struct sockaddr_un *sunaddr, 1082 int addr_len, int type) 1083 { 1084 struct sock *sk; 1085 1086 if (sunaddr->sun_path[0]) 1087 sk = unix_find_bsd(sunaddr, addr_len, type); 1088 else 1089 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1090 1091 return sk; 1092 } 1093 1094 static int unix_autobind(struct sock *sk) 1095 { 1096 unsigned int new_hash, old_hash = sk->sk_hash; 1097 struct unix_sock *u = unix_sk(sk); 1098 struct net *net = sock_net(sk); 1099 struct unix_address *addr; 1100 u32 lastnum, ordernum; 1101 int err; 1102 1103 err = mutex_lock_interruptible(&u->bindlock); 1104 if (err) 1105 return err; 1106 1107 if (u->addr) 1108 goto out; 1109 1110 err = -ENOMEM; 1111 addr = kzalloc(sizeof(*addr) + 1112 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1113 if (!addr) 1114 goto out; 1115 1116 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1117 addr->name->sun_family = AF_UNIX; 1118 refcount_set(&addr->refcnt, 1); 1119 1120 ordernum = prandom_u32(); 1121 lastnum = ordernum & 0xFFFFF; 1122 retry: 1123 ordernum = (ordernum + 1) & 0xFFFFF; 1124 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1125 1126 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1127 unix_table_double_lock(net, old_hash, new_hash); 1128 1129 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1130 unix_table_double_unlock(net, old_hash, new_hash); 1131 1132 /* __unix_find_socket_byname() may take long time if many names 1133 * are already in use. 1134 */ 1135 cond_resched(); 1136 1137 if (ordernum == lastnum) { 1138 /* Give up if all names seems to be in use. */ 1139 err = -ENOSPC; 1140 unix_release_addr(addr); 1141 goto out; 1142 } 1143 1144 goto retry; 1145 } 1146 1147 __unix_set_addr_hash(net, sk, addr, new_hash); 1148 unix_table_double_unlock(net, old_hash, new_hash); 1149 err = 0; 1150 1151 out: mutex_unlock(&u->bindlock); 1152 return err; 1153 } 1154 1155 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1156 int addr_len) 1157 { 1158 umode_t mode = S_IFSOCK | 1159 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1160 unsigned int new_hash, old_hash = sk->sk_hash; 1161 struct unix_sock *u = unix_sk(sk); 1162 struct net *net = sock_net(sk); 1163 struct user_namespace *ns; // barf... 1164 struct unix_address *addr; 1165 struct dentry *dentry; 1166 struct path parent; 1167 int err; 1168 1169 unix_mkname_bsd(sunaddr, addr_len); 1170 addr_len = strlen(sunaddr->sun_path) + 1171 offsetof(struct sockaddr_un, sun_path) + 1; 1172 1173 addr = unix_create_addr(sunaddr, addr_len); 1174 if (!addr) 1175 return -ENOMEM; 1176 1177 /* 1178 * Get the parent directory, calculate the hash for last 1179 * component. 1180 */ 1181 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1182 if (IS_ERR(dentry)) { 1183 err = PTR_ERR(dentry); 1184 goto out; 1185 } 1186 1187 /* 1188 * All right, let's create it. 1189 */ 1190 ns = mnt_user_ns(parent.mnt); 1191 err = security_path_mknod(&parent, dentry, mode, 0); 1192 if (!err) 1193 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1194 if (err) 1195 goto out_path; 1196 err = mutex_lock_interruptible(&u->bindlock); 1197 if (err) 1198 goto out_unlink; 1199 if (u->addr) 1200 goto out_unlock; 1201 1202 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1203 unix_table_double_lock(net, old_hash, new_hash); 1204 u->path.mnt = mntget(parent.mnt); 1205 u->path.dentry = dget(dentry); 1206 __unix_set_addr_hash(net, sk, addr, new_hash); 1207 unix_table_double_unlock(net, old_hash, new_hash); 1208 unix_insert_bsd_socket(sk); 1209 mutex_unlock(&u->bindlock); 1210 done_path_create(&parent, dentry); 1211 return 0; 1212 1213 out_unlock: 1214 mutex_unlock(&u->bindlock); 1215 err = -EINVAL; 1216 out_unlink: 1217 /* failed after successful mknod? unlink what we'd created... */ 1218 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1219 out_path: 1220 done_path_create(&parent, dentry); 1221 out: 1222 unix_release_addr(addr); 1223 return err == -EEXIST ? -EADDRINUSE : err; 1224 } 1225 1226 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1227 int addr_len) 1228 { 1229 unsigned int new_hash, old_hash = sk->sk_hash; 1230 struct unix_sock *u = unix_sk(sk); 1231 struct net *net = sock_net(sk); 1232 struct unix_address *addr; 1233 int err; 1234 1235 addr = unix_create_addr(sunaddr, addr_len); 1236 if (!addr) 1237 return -ENOMEM; 1238 1239 err = mutex_lock_interruptible(&u->bindlock); 1240 if (err) 1241 goto out; 1242 1243 if (u->addr) { 1244 err = -EINVAL; 1245 goto out_mutex; 1246 } 1247 1248 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1249 unix_table_double_lock(net, old_hash, new_hash); 1250 1251 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1252 goto out_spin; 1253 1254 __unix_set_addr_hash(net, sk, addr, new_hash); 1255 unix_table_double_unlock(net, old_hash, new_hash); 1256 mutex_unlock(&u->bindlock); 1257 return 0; 1258 1259 out_spin: 1260 unix_table_double_unlock(net, old_hash, new_hash); 1261 err = -EADDRINUSE; 1262 out_mutex: 1263 mutex_unlock(&u->bindlock); 1264 out: 1265 unix_release_addr(addr); 1266 return err; 1267 } 1268 1269 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1270 { 1271 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1272 struct sock *sk = sock->sk; 1273 int err; 1274 1275 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1276 sunaddr->sun_family == AF_UNIX) 1277 return unix_autobind(sk); 1278 1279 err = unix_validate_addr(sunaddr, addr_len); 1280 if (err) 1281 return err; 1282 1283 if (sunaddr->sun_path[0]) 1284 err = unix_bind_bsd(sk, sunaddr, addr_len); 1285 else 1286 err = unix_bind_abstract(sk, sunaddr, addr_len); 1287 1288 return err; 1289 } 1290 1291 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1292 { 1293 if (unlikely(sk1 == sk2) || !sk2) { 1294 unix_state_lock(sk1); 1295 return; 1296 } 1297 if (sk1 < sk2) { 1298 unix_state_lock(sk1); 1299 unix_state_lock_nested(sk2); 1300 } else { 1301 unix_state_lock(sk2); 1302 unix_state_lock_nested(sk1); 1303 } 1304 } 1305 1306 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1307 { 1308 if (unlikely(sk1 == sk2) || !sk2) { 1309 unix_state_unlock(sk1); 1310 return; 1311 } 1312 unix_state_unlock(sk1); 1313 unix_state_unlock(sk2); 1314 } 1315 1316 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1317 int alen, int flags) 1318 { 1319 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1320 struct sock *sk = sock->sk; 1321 struct sock *other; 1322 int err; 1323 1324 err = -EINVAL; 1325 if (alen < offsetofend(struct sockaddr, sa_family)) 1326 goto out; 1327 1328 if (addr->sa_family != AF_UNSPEC) { 1329 err = unix_validate_addr(sunaddr, alen); 1330 if (err) 1331 goto out; 1332 1333 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1334 !unix_sk(sk)->addr) { 1335 err = unix_autobind(sk); 1336 if (err) 1337 goto out; 1338 } 1339 1340 restart: 1341 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1342 if (IS_ERR(other)) { 1343 err = PTR_ERR(other); 1344 goto out; 1345 } 1346 1347 unix_state_double_lock(sk, other); 1348 1349 /* Apparently VFS overslept socket death. Retry. */ 1350 if (sock_flag(other, SOCK_DEAD)) { 1351 unix_state_double_unlock(sk, other); 1352 sock_put(other); 1353 goto restart; 1354 } 1355 1356 err = -EPERM; 1357 if (!unix_may_send(sk, other)) 1358 goto out_unlock; 1359 1360 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1361 if (err) 1362 goto out_unlock; 1363 1364 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1365 } else { 1366 /* 1367 * 1003.1g breaking connected state with AF_UNSPEC 1368 */ 1369 other = NULL; 1370 unix_state_double_lock(sk, other); 1371 } 1372 1373 /* 1374 * If it was connected, reconnect. 1375 */ 1376 if (unix_peer(sk)) { 1377 struct sock *old_peer = unix_peer(sk); 1378 1379 unix_peer(sk) = other; 1380 if (!other) 1381 sk->sk_state = TCP_CLOSE; 1382 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1383 1384 unix_state_double_unlock(sk, other); 1385 1386 if (other != old_peer) 1387 unix_dgram_disconnected(sk, old_peer); 1388 sock_put(old_peer); 1389 } else { 1390 unix_peer(sk) = other; 1391 unix_state_double_unlock(sk, other); 1392 } 1393 1394 return 0; 1395 1396 out_unlock: 1397 unix_state_double_unlock(sk, other); 1398 sock_put(other); 1399 out: 1400 return err; 1401 } 1402 1403 static long unix_wait_for_peer(struct sock *other, long timeo) 1404 __releases(&unix_sk(other)->lock) 1405 { 1406 struct unix_sock *u = unix_sk(other); 1407 int sched; 1408 DEFINE_WAIT(wait); 1409 1410 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1411 1412 sched = !sock_flag(other, SOCK_DEAD) && 1413 !(other->sk_shutdown & RCV_SHUTDOWN) && 1414 unix_recvq_full(other); 1415 1416 unix_state_unlock(other); 1417 1418 if (sched) 1419 timeo = schedule_timeout(timeo); 1420 1421 finish_wait(&u->peer_wait, &wait); 1422 return timeo; 1423 } 1424 1425 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1426 int addr_len, int flags) 1427 { 1428 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1429 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1430 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1431 struct net *net = sock_net(sk); 1432 struct sk_buff *skb = NULL; 1433 long timeo; 1434 int err; 1435 int st; 1436 1437 err = unix_validate_addr(sunaddr, addr_len); 1438 if (err) 1439 goto out; 1440 1441 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1442 err = unix_autobind(sk); 1443 if (err) 1444 goto out; 1445 } 1446 1447 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1448 1449 /* First of all allocate resources. 1450 If we will make it after state is locked, 1451 we will have to recheck all again in any case. 1452 */ 1453 1454 /* create new sock for complete connection */ 1455 newsk = unix_create1(net, NULL, 0, sock->type); 1456 if (IS_ERR(newsk)) { 1457 err = PTR_ERR(newsk); 1458 newsk = NULL; 1459 goto out; 1460 } 1461 1462 err = -ENOMEM; 1463 1464 /* Allocate skb for sending to listening sock */ 1465 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1466 if (skb == NULL) 1467 goto out; 1468 1469 restart: 1470 /* Find listening sock. */ 1471 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1472 if (IS_ERR(other)) { 1473 err = PTR_ERR(other); 1474 other = NULL; 1475 goto out; 1476 } 1477 1478 /* Latch state of peer */ 1479 unix_state_lock(other); 1480 1481 /* Apparently VFS overslept socket death. Retry. */ 1482 if (sock_flag(other, SOCK_DEAD)) { 1483 unix_state_unlock(other); 1484 sock_put(other); 1485 goto restart; 1486 } 1487 1488 err = -ECONNREFUSED; 1489 if (other->sk_state != TCP_LISTEN) 1490 goto out_unlock; 1491 if (other->sk_shutdown & RCV_SHUTDOWN) 1492 goto out_unlock; 1493 1494 if (unix_recvq_full(other)) { 1495 err = -EAGAIN; 1496 if (!timeo) 1497 goto out_unlock; 1498 1499 timeo = unix_wait_for_peer(other, timeo); 1500 1501 err = sock_intr_errno(timeo); 1502 if (signal_pending(current)) 1503 goto out; 1504 sock_put(other); 1505 goto restart; 1506 } 1507 1508 /* Latch our state. 1509 1510 It is tricky place. We need to grab our state lock and cannot 1511 drop lock on peer. It is dangerous because deadlock is 1512 possible. Connect to self case and simultaneous 1513 attempt to connect are eliminated by checking socket 1514 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1515 check this before attempt to grab lock. 1516 1517 Well, and we have to recheck the state after socket locked. 1518 */ 1519 st = sk->sk_state; 1520 1521 switch (st) { 1522 case TCP_CLOSE: 1523 /* This is ok... continue with connect */ 1524 break; 1525 case TCP_ESTABLISHED: 1526 /* Socket is already connected */ 1527 err = -EISCONN; 1528 goto out_unlock; 1529 default: 1530 err = -EINVAL; 1531 goto out_unlock; 1532 } 1533 1534 unix_state_lock_nested(sk); 1535 1536 if (sk->sk_state != st) { 1537 unix_state_unlock(sk); 1538 unix_state_unlock(other); 1539 sock_put(other); 1540 goto restart; 1541 } 1542 1543 err = security_unix_stream_connect(sk, other, newsk); 1544 if (err) { 1545 unix_state_unlock(sk); 1546 goto out_unlock; 1547 } 1548 1549 /* The way is open! Fastly set all the necessary fields... */ 1550 1551 sock_hold(sk); 1552 unix_peer(newsk) = sk; 1553 newsk->sk_state = TCP_ESTABLISHED; 1554 newsk->sk_type = sk->sk_type; 1555 init_peercred(newsk); 1556 newu = unix_sk(newsk); 1557 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1558 otheru = unix_sk(other); 1559 1560 /* copy address information from listening to new sock 1561 * 1562 * The contents of *(otheru->addr) and otheru->path 1563 * are seen fully set up here, since we have found 1564 * otheru in hash under its lock. Insertion into the 1565 * hash chain we'd found it in had been done in an 1566 * earlier critical area protected by the chain's lock, 1567 * the same one where we'd set *(otheru->addr) contents, 1568 * as well as otheru->path and otheru->addr itself. 1569 * 1570 * Using smp_store_release() here to set newu->addr 1571 * is enough to make those stores, as well as stores 1572 * to newu->path visible to anyone who gets newu->addr 1573 * by smp_load_acquire(). IOW, the same warranties 1574 * as for unix_sock instances bound in unix_bind() or 1575 * in unix_autobind(). 1576 */ 1577 if (otheru->path.dentry) { 1578 path_get(&otheru->path); 1579 newu->path = otheru->path; 1580 } 1581 refcount_inc(&otheru->addr->refcnt); 1582 smp_store_release(&newu->addr, otheru->addr); 1583 1584 /* Set credentials */ 1585 copy_peercred(sk, other); 1586 1587 sock->state = SS_CONNECTED; 1588 sk->sk_state = TCP_ESTABLISHED; 1589 sock_hold(newsk); 1590 1591 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1592 unix_peer(sk) = newsk; 1593 1594 unix_state_unlock(sk); 1595 1596 /* take ten and send info to listening sock */ 1597 spin_lock(&other->sk_receive_queue.lock); 1598 __skb_queue_tail(&other->sk_receive_queue, skb); 1599 spin_unlock(&other->sk_receive_queue.lock); 1600 unix_state_unlock(other); 1601 other->sk_data_ready(other); 1602 sock_put(other); 1603 return 0; 1604 1605 out_unlock: 1606 if (other) 1607 unix_state_unlock(other); 1608 1609 out: 1610 kfree_skb(skb); 1611 if (newsk) 1612 unix_release_sock(newsk, 0); 1613 if (other) 1614 sock_put(other); 1615 return err; 1616 } 1617 1618 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1619 { 1620 struct sock *ska = socka->sk, *skb = sockb->sk; 1621 1622 /* Join our sockets back to back */ 1623 sock_hold(ska); 1624 sock_hold(skb); 1625 unix_peer(ska) = skb; 1626 unix_peer(skb) = ska; 1627 init_peercred(ska); 1628 init_peercred(skb); 1629 1630 ska->sk_state = TCP_ESTABLISHED; 1631 skb->sk_state = TCP_ESTABLISHED; 1632 socka->state = SS_CONNECTED; 1633 sockb->state = SS_CONNECTED; 1634 return 0; 1635 } 1636 1637 static void unix_sock_inherit_flags(const struct socket *old, 1638 struct socket *new) 1639 { 1640 if (test_bit(SOCK_PASSCRED, &old->flags)) 1641 set_bit(SOCK_PASSCRED, &new->flags); 1642 if (test_bit(SOCK_PASSSEC, &old->flags)) 1643 set_bit(SOCK_PASSSEC, &new->flags); 1644 } 1645 1646 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1647 bool kern) 1648 { 1649 struct sock *sk = sock->sk; 1650 struct sock *tsk; 1651 struct sk_buff *skb; 1652 int err; 1653 1654 err = -EOPNOTSUPP; 1655 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1656 goto out; 1657 1658 err = -EINVAL; 1659 if (sk->sk_state != TCP_LISTEN) 1660 goto out; 1661 1662 /* If socket state is TCP_LISTEN it cannot change (for now...), 1663 * so that no locks are necessary. 1664 */ 1665 1666 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1667 &err); 1668 if (!skb) { 1669 /* This means receive shutdown. */ 1670 if (err == 0) 1671 err = -EINVAL; 1672 goto out; 1673 } 1674 1675 tsk = skb->sk; 1676 skb_free_datagram(sk, skb); 1677 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1678 1679 /* attach accepted sock to socket */ 1680 unix_state_lock(tsk); 1681 newsock->state = SS_CONNECTED; 1682 unix_sock_inherit_flags(sock, newsock); 1683 sock_graft(tsk, newsock); 1684 unix_state_unlock(tsk); 1685 return 0; 1686 1687 out: 1688 return err; 1689 } 1690 1691 1692 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1693 { 1694 struct sock *sk = sock->sk; 1695 struct unix_address *addr; 1696 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1697 int err = 0; 1698 1699 if (peer) { 1700 sk = unix_peer_get(sk); 1701 1702 err = -ENOTCONN; 1703 if (!sk) 1704 goto out; 1705 err = 0; 1706 } else { 1707 sock_hold(sk); 1708 } 1709 1710 addr = smp_load_acquire(&unix_sk(sk)->addr); 1711 if (!addr) { 1712 sunaddr->sun_family = AF_UNIX; 1713 sunaddr->sun_path[0] = 0; 1714 err = offsetof(struct sockaddr_un, sun_path); 1715 } else { 1716 err = addr->len; 1717 memcpy(sunaddr, addr->name, addr->len); 1718 } 1719 sock_put(sk); 1720 out: 1721 return err; 1722 } 1723 1724 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1725 { 1726 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1727 1728 /* 1729 * Garbage collection of unix sockets starts by selecting a set of 1730 * candidate sockets which have reference only from being in flight 1731 * (total_refs == inflight_refs). This condition is checked once during 1732 * the candidate collection phase, and candidates are marked as such, so 1733 * that non-candidates can later be ignored. While inflight_refs is 1734 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1735 * is an instantaneous decision. 1736 * 1737 * Once a candidate, however, the socket must not be reinstalled into a 1738 * file descriptor while the garbage collection is in progress. 1739 * 1740 * If the above conditions are met, then the directed graph of 1741 * candidates (*) does not change while unix_gc_lock is held. 1742 * 1743 * Any operations that changes the file count through file descriptors 1744 * (dup, close, sendmsg) does not change the graph since candidates are 1745 * not installed in fds. 1746 * 1747 * Dequeing a candidate via recvmsg would install it into an fd, but 1748 * that takes unix_gc_lock to decrement the inflight count, so it's 1749 * serialized with garbage collection. 1750 * 1751 * MSG_PEEK is special in that it does not change the inflight count, 1752 * yet does install the socket into an fd. The following lock/unlock 1753 * pair is to ensure serialization with garbage collection. It must be 1754 * done between incrementing the file count and installing the file into 1755 * an fd. 1756 * 1757 * If garbage collection starts after the barrier provided by the 1758 * lock/unlock, then it will see the elevated refcount and not mark this 1759 * as a candidate. If a garbage collection is already in progress 1760 * before the file count was incremented, then the lock/unlock pair will 1761 * ensure that garbage collection is finished before progressing to 1762 * installing the fd. 1763 * 1764 * (*) A -> B where B is on the queue of A or B is on the queue of C 1765 * which is on the queue of listening socket A. 1766 */ 1767 spin_lock(&unix_gc_lock); 1768 spin_unlock(&unix_gc_lock); 1769 } 1770 1771 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1772 { 1773 int err = 0; 1774 1775 UNIXCB(skb).pid = get_pid(scm->pid); 1776 UNIXCB(skb).uid = scm->creds.uid; 1777 UNIXCB(skb).gid = scm->creds.gid; 1778 UNIXCB(skb).fp = NULL; 1779 unix_get_secdata(scm, skb); 1780 if (scm->fp && send_fds) 1781 err = unix_attach_fds(scm, skb); 1782 1783 skb->destructor = unix_destruct_scm; 1784 return err; 1785 } 1786 1787 static bool unix_passcred_enabled(const struct socket *sock, 1788 const struct sock *other) 1789 { 1790 return test_bit(SOCK_PASSCRED, &sock->flags) || 1791 !other->sk_socket || 1792 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1793 } 1794 1795 /* 1796 * Some apps rely on write() giving SCM_CREDENTIALS 1797 * We include credentials if source or destination socket 1798 * asserted SOCK_PASSCRED. 1799 */ 1800 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1801 const struct sock *other) 1802 { 1803 if (UNIXCB(skb).pid) 1804 return; 1805 if (unix_passcred_enabled(sock, other)) { 1806 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1807 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1808 } 1809 } 1810 1811 static int maybe_init_creds(struct scm_cookie *scm, 1812 struct socket *socket, 1813 const struct sock *other) 1814 { 1815 int err; 1816 struct msghdr msg = { .msg_controllen = 0 }; 1817 1818 err = scm_send(socket, &msg, scm, false); 1819 if (err) 1820 return err; 1821 1822 if (unix_passcred_enabled(socket, other)) { 1823 scm->pid = get_pid(task_tgid(current)); 1824 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1825 } 1826 return err; 1827 } 1828 1829 static bool unix_skb_scm_eq(struct sk_buff *skb, 1830 struct scm_cookie *scm) 1831 { 1832 return UNIXCB(skb).pid == scm->pid && 1833 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1834 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1835 unix_secdata_eq(scm, skb); 1836 } 1837 1838 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1839 { 1840 struct scm_fp_list *fp = UNIXCB(skb).fp; 1841 struct unix_sock *u = unix_sk(sk); 1842 1843 if (unlikely(fp && fp->count)) 1844 atomic_add(fp->count, &u->scm_stat.nr_fds); 1845 } 1846 1847 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1848 { 1849 struct scm_fp_list *fp = UNIXCB(skb).fp; 1850 struct unix_sock *u = unix_sk(sk); 1851 1852 if (unlikely(fp && fp->count)) 1853 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1854 } 1855 1856 /* 1857 * Send AF_UNIX data. 1858 */ 1859 1860 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1861 size_t len) 1862 { 1863 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1864 struct sock *sk = sock->sk, *other = NULL; 1865 struct unix_sock *u = unix_sk(sk); 1866 struct scm_cookie scm; 1867 struct sk_buff *skb; 1868 int data_len = 0; 1869 int sk_locked; 1870 long timeo; 1871 int err; 1872 1873 wait_for_unix_gc(); 1874 err = scm_send(sock, msg, &scm, false); 1875 if (err < 0) 1876 return err; 1877 1878 err = -EOPNOTSUPP; 1879 if (msg->msg_flags&MSG_OOB) 1880 goto out; 1881 1882 if (msg->msg_namelen) { 1883 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1884 if (err) 1885 goto out; 1886 } else { 1887 sunaddr = NULL; 1888 err = -ENOTCONN; 1889 other = unix_peer_get(sk); 1890 if (!other) 1891 goto out; 1892 } 1893 1894 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1895 err = unix_autobind(sk); 1896 if (err) 1897 goto out; 1898 } 1899 1900 err = -EMSGSIZE; 1901 if (len > sk->sk_sndbuf - 32) 1902 goto out; 1903 1904 if (len > SKB_MAX_ALLOC) { 1905 data_len = min_t(size_t, 1906 len - SKB_MAX_ALLOC, 1907 MAX_SKB_FRAGS * PAGE_SIZE); 1908 data_len = PAGE_ALIGN(data_len); 1909 1910 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1911 } 1912 1913 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1914 msg->msg_flags & MSG_DONTWAIT, &err, 1915 PAGE_ALLOC_COSTLY_ORDER); 1916 if (skb == NULL) 1917 goto out; 1918 1919 err = unix_scm_to_skb(&scm, skb, true); 1920 if (err < 0) 1921 goto out_free; 1922 1923 skb_put(skb, len - data_len); 1924 skb->data_len = data_len; 1925 skb->len = len; 1926 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1927 if (err) 1928 goto out_free; 1929 1930 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1931 1932 restart: 1933 if (!other) { 1934 err = -ECONNRESET; 1935 if (sunaddr == NULL) 1936 goto out_free; 1937 1938 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1939 sk->sk_type); 1940 if (IS_ERR(other)) { 1941 err = PTR_ERR(other); 1942 other = NULL; 1943 goto out_free; 1944 } 1945 } 1946 1947 if (sk_filter(other, skb) < 0) { 1948 /* Toss the packet but do not return any error to the sender */ 1949 err = len; 1950 goto out_free; 1951 } 1952 1953 sk_locked = 0; 1954 unix_state_lock(other); 1955 restart_locked: 1956 err = -EPERM; 1957 if (!unix_may_send(sk, other)) 1958 goto out_unlock; 1959 1960 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1961 /* 1962 * Check with 1003.1g - what should 1963 * datagram error 1964 */ 1965 unix_state_unlock(other); 1966 sock_put(other); 1967 1968 if (!sk_locked) 1969 unix_state_lock(sk); 1970 1971 err = 0; 1972 if (unix_peer(sk) == other) { 1973 unix_peer(sk) = NULL; 1974 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1975 1976 unix_state_unlock(sk); 1977 1978 sk->sk_state = TCP_CLOSE; 1979 unix_dgram_disconnected(sk, other); 1980 sock_put(other); 1981 err = -ECONNREFUSED; 1982 } else { 1983 unix_state_unlock(sk); 1984 } 1985 1986 other = NULL; 1987 if (err) 1988 goto out_free; 1989 goto restart; 1990 } 1991 1992 err = -EPIPE; 1993 if (other->sk_shutdown & RCV_SHUTDOWN) 1994 goto out_unlock; 1995 1996 if (sk->sk_type != SOCK_SEQPACKET) { 1997 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1998 if (err) 1999 goto out_unlock; 2000 } 2001 2002 /* other == sk && unix_peer(other) != sk if 2003 * - unix_peer(sk) == NULL, destination address bound to sk 2004 * - unix_peer(sk) == sk by time of get but disconnected before lock 2005 */ 2006 if (other != sk && 2007 unlikely(unix_peer(other) != sk && 2008 unix_recvq_full_lockless(other))) { 2009 if (timeo) { 2010 timeo = unix_wait_for_peer(other, timeo); 2011 2012 err = sock_intr_errno(timeo); 2013 if (signal_pending(current)) 2014 goto out_free; 2015 2016 goto restart; 2017 } 2018 2019 if (!sk_locked) { 2020 unix_state_unlock(other); 2021 unix_state_double_lock(sk, other); 2022 } 2023 2024 if (unix_peer(sk) != other || 2025 unix_dgram_peer_wake_me(sk, other)) { 2026 err = -EAGAIN; 2027 sk_locked = 1; 2028 goto out_unlock; 2029 } 2030 2031 if (!sk_locked) { 2032 sk_locked = 1; 2033 goto restart_locked; 2034 } 2035 } 2036 2037 if (unlikely(sk_locked)) 2038 unix_state_unlock(sk); 2039 2040 if (sock_flag(other, SOCK_RCVTSTAMP)) 2041 __net_timestamp(skb); 2042 maybe_add_creds(skb, sock, other); 2043 scm_stat_add(other, skb); 2044 skb_queue_tail(&other->sk_receive_queue, skb); 2045 unix_state_unlock(other); 2046 other->sk_data_ready(other); 2047 sock_put(other); 2048 scm_destroy(&scm); 2049 return len; 2050 2051 out_unlock: 2052 if (sk_locked) 2053 unix_state_unlock(sk); 2054 unix_state_unlock(other); 2055 out_free: 2056 kfree_skb(skb); 2057 out: 2058 if (other) 2059 sock_put(other); 2060 scm_destroy(&scm); 2061 return err; 2062 } 2063 2064 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2065 * bytes, and a minimum of a full page. 2066 */ 2067 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2068 2069 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2070 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 2071 { 2072 struct unix_sock *ousk = unix_sk(other); 2073 struct sk_buff *skb; 2074 int err = 0; 2075 2076 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2077 2078 if (!skb) 2079 return err; 2080 2081 skb_put(skb, 1); 2082 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2083 2084 if (err) { 2085 kfree_skb(skb); 2086 return err; 2087 } 2088 2089 unix_state_lock(other); 2090 2091 if (sock_flag(other, SOCK_DEAD) || 2092 (other->sk_shutdown & RCV_SHUTDOWN)) { 2093 unix_state_unlock(other); 2094 kfree_skb(skb); 2095 return -EPIPE; 2096 } 2097 2098 maybe_add_creds(skb, sock, other); 2099 skb_get(skb); 2100 2101 if (ousk->oob_skb) 2102 consume_skb(ousk->oob_skb); 2103 2104 WRITE_ONCE(ousk->oob_skb, skb); 2105 2106 scm_stat_add(other, skb); 2107 skb_queue_tail(&other->sk_receive_queue, skb); 2108 sk_send_sigurg(other); 2109 unix_state_unlock(other); 2110 other->sk_data_ready(other); 2111 2112 return err; 2113 } 2114 #endif 2115 2116 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2117 size_t len) 2118 { 2119 struct sock *sk = sock->sk; 2120 struct sock *other = NULL; 2121 int err, size; 2122 struct sk_buff *skb; 2123 int sent = 0; 2124 struct scm_cookie scm; 2125 bool fds_sent = false; 2126 int data_len; 2127 2128 wait_for_unix_gc(); 2129 err = scm_send(sock, msg, &scm, false); 2130 if (err < 0) 2131 return err; 2132 2133 err = -EOPNOTSUPP; 2134 if (msg->msg_flags & MSG_OOB) { 2135 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2136 if (len) 2137 len--; 2138 else 2139 #endif 2140 goto out_err; 2141 } 2142 2143 if (msg->msg_namelen) { 2144 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2145 goto out_err; 2146 } else { 2147 err = -ENOTCONN; 2148 other = unix_peer(sk); 2149 if (!other) 2150 goto out_err; 2151 } 2152 2153 if (sk->sk_shutdown & SEND_SHUTDOWN) 2154 goto pipe_err; 2155 2156 while (sent < len) { 2157 size = len - sent; 2158 2159 /* Keep two messages in the pipe so it schedules better */ 2160 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2161 2162 /* allow fallback to order-0 allocations */ 2163 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2164 2165 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2166 2167 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2168 2169 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2170 msg->msg_flags & MSG_DONTWAIT, &err, 2171 get_order(UNIX_SKB_FRAGS_SZ)); 2172 if (!skb) 2173 goto out_err; 2174 2175 /* Only send the fds in the first buffer */ 2176 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2177 if (err < 0) { 2178 kfree_skb(skb); 2179 goto out_err; 2180 } 2181 fds_sent = true; 2182 2183 skb_put(skb, size - data_len); 2184 skb->data_len = data_len; 2185 skb->len = size; 2186 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2187 if (err) { 2188 kfree_skb(skb); 2189 goto out_err; 2190 } 2191 2192 unix_state_lock(other); 2193 2194 if (sock_flag(other, SOCK_DEAD) || 2195 (other->sk_shutdown & RCV_SHUTDOWN)) 2196 goto pipe_err_free; 2197 2198 maybe_add_creds(skb, sock, other); 2199 scm_stat_add(other, skb); 2200 skb_queue_tail(&other->sk_receive_queue, skb); 2201 unix_state_unlock(other); 2202 other->sk_data_ready(other); 2203 sent += size; 2204 } 2205 2206 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2207 if (msg->msg_flags & MSG_OOB) { 2208 err = queue_oob(sock, msg, other); 2209 if (err) 2210 goto out_err; 2211 sent++; 2212 } 2213 #endif 2214 2215 scm_destroy(&scm); 2216 2217 return sent; 2218 2219 pipe_err_free: 2220 unix_state_unlock(other); 2221 kfree_skb(skb); 2222 pipe_err: 2223 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2224 send_sig(SIGPIPE, current, 0); 2225 err = -EPIPE; 2226 out_err: 2227 scm_destroy(&scm); 2228 return sent ? : err; 2229 } 2230 2231 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2232 int offset, size_t size, int flags) 2233 { 2234 int err; 2235 bool send_sigpipe = false; 2236 bool init_scm = true; 2237 struct scm_cookie scm; 2238 struct sock *other, *sk = socket->sk; 2239 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2240 2241 if (flags & MSG_OOB) 2242 return -EOPNOTSUPP; 2243 2244 other = unix_peer(sk); 2245 if (!other || sk->sk_state != TCP_ESTABLISHED) 2246 return -ENOTCONN; 2247 2248 if (false) { 2249 alloc_skb: 2250 unix_state_unlock(other); 2251 mutex_unlock(&unix_sk(other)->iolock); 2252 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2253 &err, 0); 2254 if (!newskb) 2255 goto err; 2256 } 2257 2258 /* we must acquire iolock as we modify already present 2259 * skbs in the sk_receive_queue and mess with skb->len 2260 */ 2261 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2262 if (err) { 2263 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2264 goto err; 2265 } 2266 2267 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2268 err = -EPIPE; 2269 send_sigpipe = true; 2270 goto err_unlock; 2271 } 2272 2273 unix_state_lock(other); 2274 2275 if (sock_flag(other, SOCK_DEAD) || 2276 other->sk_shutdown & RCV_SHUTDOWN) { 2277 err = -EPIPE; 2278 send_sigpipe = true; 2279 goto err_state_unlock; 2280 } 2281 2282 if (init_scm) { 2283 err = maybe_init_creds(&scm, socket, other); 2284 if (err) 2285 goto err_state_unlock; 2286 init_scm = false; 2287 } 2288 2289 skb = skb_peek_tail(&other->sk_receive_queue); 2290 if (tail && tail == skb) { 2291 skb = newskb; 2292 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2293 if (newskb) { 2294 skb = newskb; 2295 } else { 2296 tail = skb; 2297 goto alloc_skb; 2298 } 2299 } else if (newskb) { 2300 /* this is fast path, we don't necessarily need to 2301 * call to kfree_skb even though with newskb == NULL 2302 * this - does no harm 2303 */ 2304 consume_skb(newskb); 2305 newskb = NULL; 2306 } 2307 2308 if (skb_append_pagefrags(skb, page, offset, size)) { 2309 tail = skb; 2310 goto alloc_skb; 2311 } 2312 2313 skb->len += size; 2314 skb->data_len += size; 2315 skb->truesize += size; 2316 refcount_add(size, &sk->sk_wmem_alloc); 2317 2318 if (newskb) { 2319 err = unix_scm_to_skb(&scm, skb, false); 2320 if (err) 2321 goto err_state_unlock; 2322 spin_lock(&other->sk_receive_queue.lock); 2323 __skb_queue_tail(&other->sk_receive_queue, newskb); 2324 spin_unlock(&other->sk_receive_queue.lock); 2325 } 2326 2327 unix_state_unlock(other); 2328 mutex_unlock(&unix_sk(other)->iolock); 2329 2330 other->sk_data_ready(other); 2331 scm_destroy(&scm); 2332 return size; 2333 2334 err_state_unlock: 2335 unix_state_unlock(other); 2336 err_unlock: 2337 mutex_unlock(&unix_sk(other)->iolock); 2338 err: 2339 kfree_skb(newskb); 2340 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2341 send_sig(SIGPIPE, current, 0); 2342 if (!init_scm) 2343 scm_destroy(&scm); 2344 return err; 2345 } 2346 2347 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2348 size_t len) 2349 { 2350 int err; 2351 struct sock *sk = sock->sk; 2352 2353 err = sock_error(sk); 2354 if (err) 2355 return err; 2356 2357 if (sk->sk_state != TCP_ESTABLISHED) 2358 return -ENOTCONN; 2359 2360 if (msg->msg_namelen) 2361 msg->msg_namelen = 0; 2362 2363 return unix_dgram_sendmsg(sock, msg, len); 2364 } 2365 2366 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2367 size_t size, int flags) 2368 { 2369 struct sock *sk = sock->sk; 2370 2371 if (sk->sk_state != TCP_ESTABLISHED) 2372 return -ENOTCONN; 2373 2374 return unix_dgram_recvmsg(sock, msg, size, flags); 2375 } 2376 2377 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2378 { 2379 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2380 2381 if (addr) { 2382 msg->msg_namelen = addr->len; 2383 memcpy(msg->msg_name, addr->name, addr->len); 2384 } 2385 } 2386 2387 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2388 int flags) 2389 { 2390 struct scm_cookie scm; 2391 struct socket *sock = sk->sk_socket; 2392 struct unix_sock *u = unix_sk(sk); 2393 struct sk_buff *skb, *last; 2394 long timeo; 2395 int skip; 2396 int err; 2397 2398 err = -EOPNOTSUPP; 2399 if (flags&MSG_OOB) 2400 goto out; 2401 2402 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2403 2404 do { 2405 mutex_lock(&u->iolock); 2406 2407 skip = sk_peek_offset(sk, flags); 2408 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2409 &skip, &err, &last); 2410 if (skb) { 2411 if (!(flags & MSG_PEEK)) 2412 scm_stat_del(sk, skb); 2413 break; 2414 } 2415 2416 mutex_unlock(&u->iolock); 2417 2418 if (err != -EAGAIN) 2419 break; 2420 } while (timeo && 2421 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2422 &err, &timeo, last)); 2423 2424 if (!skb) { /* implies iolock unlocked */ 2425 unix_state_lock(sk); 2426 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2427 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2428 (sk->sk_shutdown & RCV_SHUTDOWN)) 2429 err = 0; 2430 unix_state_unlock(sk); 2431 goto out; 2432 } 2433 2434 if (wq_has_sleeper(&u->peer_wait)) 2435 wake_up_interruptible_sync_poll(&u->peer_wait, 2436 EPOLLOUT | EPOLLWRNORM | 2437 EPOLLWRBAND); 2438 2439 if (msg->msg_name) 2440 unix_copy_addr(msg, skb->sk); 2441 2442 if (size > skb->len - skip) 2443 size = skb->len - skip; 2444 else if (size < skb->len - skip) 2445 msg->msg_flags |= MSG_TRUNC; 2446 2447 err = skb_copy_datagram_msg(skb, skip, msg, size); 2448 if (err) 2449 goto out_free; 2450 2451 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2452 __sock_recv_timestamp(msg, sk, skb); 2453 2454 memset(&scm, 0, sizeof(scm)); 2455 2456 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2457 unix_set_secdata(&scm, skb); 2458 2459 if (!(flags & MSG_PEEK)) { 2460 if (UNIXCB(skb).fp) 2461 unix_detach_fds(&scm, skb); 2462 2463 sk_peek_offset_bwd(sk, skb->len); 2464 } else { 2465 /* It is questionable: on PEEK we could: 2466 - do not return fds - good, but too simple 8) 2467 - return fds, and do not return them on read (old strategy, 2468 apparently wrong) 2469 - clone fds (I chose it for now, it is the most universal 2470 solution) 2471 2472 POSIX 1003.1g does not actually define this clearly 2473 at all. POSIX 1003.1g doesn't define a lot of things 2474 clearly however! 2475 2476 */ 2477 2478 sk_peek_offset_fwd(sk, size); 2479 2480 if (UNIXCB(skb).fp) 2481 unix_peek_fds(&scm, skb); 2482 } 2483 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2484 2485 scm_recv(sock, msg, &scm, flags); 2486 2487 out_free: 2488 skb_free_datagram(sk, skb); 2489 mutex_unlock(&u->iolock); 2490 out: 2491 return err; 2492 } 2493 2494 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2495 int flags) 2496 { 2497 struct sock *sk = sock->sk; 2498 2499 #ifdef CONFIG_BPF_SYSCALL 2500 const struct proto *prot = READ_ONCE(sk->sk_prot); 2501 2502 if (prot != &unix_dgram_proto) 2503 return prot->recvmsg(sk, msg, size, flags, NULL); 2504 #endif 2505 return __unix_dgram_recvmsg(sk, msg, size, flags); 2506 } 2507 2508 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2509 { 2510 int copied = 0; 2511 2512 while (1) { 2513 struct unix_sock *u = unix_sk(sk); 2514 struct sk_buff *skb; 2515 int used, err; 2516 2517 mutex_lock(&u->iolock); 2518 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2519 mutex_unlock(&u->iolock); 2520 if (!skb) 2521 return err; 2522 2523 used = recv_actor(sk, skb); 2524 if (used <= 0) { 2525 if (!copied) 2526 copied = used; 2527 kfree_skb(skb); 2528 break; 2529 } else if (used <= skb->len) { 2530 copied += used; 2531 } 2532 2533 kfree_skb(skb); 2534 break; 2535 } 2536 2537 return copied; 2538 } 2539 2540 /* 2541 * Sleep until more data has arrived. But check for races.. 2542 */ 2543 static long unix_stream_data_wait(struct sock *sk, long timeo, 2544 struct sk_buff *last, unsigned int last_len, 2545 bool freezable) 2546 { 2547 struct sk_buff *tail; 2548 DEFINE_WAIT(wait); 2549 2550 unix_state_lock(sk); 2551 2552 for (;;) { 2553 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2554 2555 tail = skb_peek_tail(&sk->sk_receive_queue); 2556 if (tail != last || 2557 (tail && tail->len != last_len) || 2558 sk->sk_err || 2559 (sk->sk_shutdown & RCV_SHUTDOWN) || 2560 signal_pending(current) || 2561 !timeo) 2562 break; 2563 2564 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2565 unix_state_unlock(sk); 2566 if (freezable) 2567 timeo = freezable_schedule_timeout(timeo); 2568 else 2569 timeo = schedule_timeout(timeo); 2570 unix_state_lock(sk); 2571 2572 if (sock_flag(sk, SOCK_DEAD)) 2573 break; 2574 2575 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2576 } 2577 2578 finish_wait(sk_sleep(sk), &wait); 2579 unix_state_unlock(sk); 2580 return timeo; 2581 } 2582 2583 static unsigned int unix_skb_len(const struct sk_buff *skb) 2584 { 2585 return skb->len - UNIXCB(skb).consumed; 2586 } 2587 2588 struct unix_stream_read_state { 2589 int (*recv_actor)(struct sk_buff *, int, int, 2590 struct unix_stream_read_state *); 2591 struct socket *socket; 2592 struct msghdr *msg; 2593 struct pipe_inode_info *pipe; 2594 size_t size; 2595 int flags; 2596 unsigned int splice_flags; 2597 }; 2598 2599 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2600 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2601 { 2602 struct socket *sock = state->socket; 2603 struct sock *sk = sock->sk; 2604 struct unix_sock *u = unix_sk(sk); 2605 int chunk = 1; 2606 struct sk_buff *oob_skb; 2607 2608 mutex_lock(&u->iolock); 2609 unix_state_lock(sk); 2610 2611 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2612 unix_state_unlock(sk); 2613 mutex_unlock(&u->iolock); 2614 return -EINVAL; 2615 } 2616 2617 oob_skb = u->oob_skb; 2618 2619 if (!(state->flags & MSG_PEEK)) 2620 WRITE_ONCE(u->oob_skb, NULL); 2621 2622 unix_state_unlock(sk); 2623 2624 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2625 2626 if (!(state->flags & MSG_PEEK)) { 2627 UNIXCB(oob_skb).consumed += 1; 2628 kfree_skb(oob_skb); 2629 } 2630 2631 mutex_unlock(&u->iolock); 2632 2633 if (chunk < 0) 2634 return -EFAULT; 2635 2636 state->msg->msg_flags |= MSG_OOB; 2637 return 1; 2638 } 2639 2640 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2641 int flags, int copied) 2642 { 2643 struct unix_sock *u = unix_sk(sk); 2644 2645 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2646 skb_unlink(skb, &sk->sk_receive_queue); 2647 consume_skb(skb); 2648 skb = NULL; 2649 } else { 2650 if (skb == u->oob_skb) { 2651 if (copied) { 2652 skb = NULL; 2653 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2654 if (!(flags & MSG_PEEK)) { 2655 WRITE_ONCE(u->oob_skb, NULL); 2656 consume_skb(skb); 2657 } 2658 } else if (!(flags & MSG_PEEK)) { 2659 skb_unlink(skb, &sk->sk_receive_queue); 2660 consume_skb(skb); 2661 skb = skb_peek(&sk->sk_receive_queue); 2662 } 2663 } 2664 } 2665 return skb; 2666 } 2667 #endif 2668 2669 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2670 { 2671 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2672 return -ENOTCONN; 2673 2674 return unix_read_skb(sk, recv_actor); 2675 } 2676 2677 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2678 bool freezable) 2679 { 2680 struct scm_cookie scm; 2681 struct socket *sock = state->socket; 2682 struct sock *sk = sock->sk; 2683 struct unix_sock *u = unix_sk(sk); 2684 int copied = 0; 2685 int flags = state->flags; 2686 int noblock = flags & MSG_DONTWAIT; 2687 bool check_creds = false; 2688 int target; 2689 int err = 0; 2690 long timeo; 2691 int skip; 2692 size_t size = state->size; 2693 unsigned int last_len; 2694 2695 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2696 err = -EINVAL; 2697 goto out; 2698 } 2699 2700 if (unlikely(flags & MSG_OOB)) { 2701 err = -EOPNOTSUPP; 2702 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2703 err = unix_stream_recv_urg(state); 2704 #endif 2705 goto out; 2706 } 2707 2708 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2709 timeo = sock_rcvtimeo(sk, noblock); 2710 2711 memset(&scm, 0, sizeof(scm)); 2712 2713 /* Lock the socket to prevent queue disordering 2714 * while sleeps in memcpy_tomsg 2715 */ 2716 mutex_lock(&u->iolock); 2717 2718 skip = max(sk_peek_offset(sk, flags), 0); 2719 2720 do { 2721 int chunk; 2722 bool drop_skb; 2723 struct sk_buff *skb, *last; 2724 2725 redo: 2726 unix_state_lock(sk); 2727 if (sock_flag(sk, SOCK_DEAD)) { 2728 err = -ECONNRESET; 2729 goto unlock; 2730 } 2731 last = skb = skb_peek(&sk->sk_receive_queue); 2732 last_len = last ? last->len : 0; 2733 2734 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2735 if (skb) { 2736 skb = manage_oob(skb, sk, flags, copied); 2737 if (!skb) { 2738 unix_state_unlock(sk); 2739 if (copied) 2740 break; 2741 goto redo; 2742 } 2743 } 2744 #endif 2745 again: 2746 if (skb == NULL) { 2747 if (copied >= target) 2748 goto unlock; 2749 2750 /* 2751 * POSIX 1003.1g mandates this order. 2752 */ 2753 2754 err = sock_error(sk); 2755 if (err) 2756 goto unlock; 2757 if (sk->sk_shutdown & RCV_SHUTDOWN) 2758 goto unlock; 2759 2760 unix_state_unlock(sk); 2761 if (!timeo) { 2762 err = -EAGAIN; 2763 break; 2764 } 2765 2766 mutex_unlock(&u->iolock); 2767 2768 timeo = unix_stream_data_wait(sk, timeo, last, 2769 last_len, freezable); 2770 2771 if (signal_pending(current)) { 2772 err = sock_intr_errno(timeo); 2773 scm_destroy(&scm); 2774 goto out; 2775 } 2776 2777 mutex_lock(&u->iolock); 2778 goto redo; 2779 unlock: 2780 unix_state_unlock(sk); 2781 break; 2782 } 2783 2784 while (skip >= unix_skb_len(skb)) { 2785 skip -= unix_skb_len(skb); 2786 last = skb; 2787 last_len = skb->len; 2788 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2789 if (!skb) 2790 goto again; 2791 } 2792 2793 unix_state_unlock(sk); 2794 2795 if (check_creds) { 2796 /* Never glue messages from different writers */ 2797 if (!unix_skb_scm_eq(skb, &scm)) 2798 break; 2799 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2800 /* Copy credentials */ 2801 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2802 unix_set_secdata(&scm, skb); 2803 check_creds = true; 2804 } 2805 2806 /* Copy address just once */ 2807 if (state->msg && state->msg->msg_name) { 2808 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2809 state->msg->msg_name); 2810 unix_copy_addr(state->msg, skb->sk); 2811 sunaddr = NULL; 2812 } 2813 2814 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2815 skb_get(skb); 2816 chunk = state->recv_actor(skb, skip, chunk, state); 2817 drop_skb = !unix_skb_len(skb); 2818 /* skb is only safe to use if !drop_skb */ 2819 consume_skb(skb); 2820 if (chunk < 0) { 2821 if (copied == 0) 2822 copied = -EFAULT; 2823 break; 2824 } 2825 copied += chunk; 2826 size -= chunk; 2827 2828 if (drop_skb) { 2829 /* the skb was touched by a concurrent reader; 2830 * we should not expect anything from this skb 2831 * anymore and assume it invalid - we can be 2832 * sure it was dropped from the socket queue 2833 * 2834 * let's report a short read 2835 */ 2836 err = 0; 2837 break; 2838 } 2839 2840 /* Mark read part of skb as used */ 2841 if (!(flags & MSG_PEEK)) { 2842 UNIXCB(skb).consumed += chunk; 2843 2844 sk_peek_offset_bwd(sk, chunk); 2845 2846 if (UNIXCB(skb).fp) { 2847 scm_stat_del(sk, skb); 2848 unix_detach_fds(&scm, skb); 2849 } 2850 2851 if (unix_skb_len(skb)) 2852 break; 2853 2854 skb_unlink(skb, &sk->sk_receive_queue); 2855 consume_skb(skb); 2856 2857 if (scm.fp) 2858 break; 2859 } else { 2860 /* It is questionable, see note in unix_dgram_recvmsg. 2861 */ 2862 if (UNIXCB(skb).fp) 2863 unix_peek_fds(&scm, skb); 2864 2865 sk_peek_offset_fwd(sk, chunk); 2866 2867 if (UNIXCB(skb).fp) 2868 break; 2869 2870 skip = 0; 2871 last = skb; 2872 last_len = skb->len; 2873 unix_state_lock(sk); 2874 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2875 if (skb) 2876 goto again; 2877 unix_state_unlock(sk); 2878 break; 2879 } 2880 } while (size); 2881 2882 mutex_unlock(&u->iolock); 2883 if (state->msg) 2884 scm_recv(sock, state->msg, &scm, flags); 2885 else 2886 scm_destroy(&scm); 2887 out: 2888 return copied ? : err; 2889 } 2890 2891 static int unix_stream_read_actor(struct sk_buff *skb, 2892 int skip, int chunk, 2893 struct unix_stream_read_state *state) 2894 { 2895 int ret; 2896 2897 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2898 state->msg, chunk); 2899 return ret ?: chunk; 2900 } 2901 2902 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2903 size_t size, int flags) 2904 { 2905 struct unix_stream_read_state state = { 2906 .recv_actor = unix_stream_read_actor, 2907 .socket = sk->sk_socket, 2908 .msg = msg, 2909 .size = size, 2910 .flags = flags 2911 }; 2912 2913 return unix_stream_read_generic(&state, true); 2914 } 2915 2916 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2917 size_t size, int flags) 2918 { 2919 struct unix_stream_read_state state = { 2920 .recv_actor = unix_stream_read_actor, 2921 .socket = sock, 2922 .msg = msg, 2923 .size = size, 2924 .flags = flags 2925 }; 2926 2927 #ifdef CONFIG_BPF_SYSCALL 2928 struct sock *sk = sock->sk; 2929 const struct proto *prot = READ_ONCE(sk->sk_prot); 2930 2931 if (prot != &unix_stream_proto) 2932 return prot->recvmsg(sk, msg, size, flags, NULL); 2933 #endif 2934 return unix_stream_read_generic(&state, true); 2935 } 2936 2937 static int unix_stream_splice_actor(struct sk_buff *skb, 2938 int skip, int chunk, 2939 struct unix_stream_read_state *state) 2940 { 2941 return skb_splice_bits(skb, state->socket->sk, 2942 UNIXCB(skb).consumed + skip, 2943 state->pipe, chunk, state->splice_flags); 2944 } 2945 2946 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2947 struct pipe_inode_info *pipe, 2948 size_t size, unsigned int flags) 2949 { 2950 struct unix_stream_read_state state = { 2951 .recv_actor = unix_stream_splice_actor, 2952 .socket = sock, 2953 .pipe = pipe, 2954 .size = size, 2955 .splice_flags = flags, 2956 }; 2957 2958 if (unlikely(*ppos)) 2959 return -ESPIPE; 2960 2961 if (sock->file->f_flags & O_NONBLOCK || 2962 flags & SPLICE_F_NONBLOCK) 2963 state.flags = MSG_DONTWAIT; 2964 2965 return unix_stream_read_generic(&state, false); 2966 } 2967 2968 static int unix_shutdown(struct socket *sock, int mode) 2969 { 2970 struct sock *sk = sock->sk; 2971 struct sock *other; 2972 2973 if (mode < SHUT_RD || mode > SHUT_RDWR) 2974 return -EINVAL; 2975 /* This maps: 2976 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2977 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2978 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2979 */ 2980 ++mode; 2981 2982 unix_state_lock(sk); 2983 sk->sk_shutdown |= mode; 2984 other = unix_peer(sk); 2985 if (other) 2986 sock_hold(other); 2987 unix_state_unlock(sk); 2988 sk->sk_state_change(sk); 2989 2990 if (other && 2991 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2992 2993 int peer_mode = 0; 2994 const struct proto *prot = READ_ONCE(other->sk_prot); 2995 2996 if (prot->unhash) 2997 prot->unhash(other); 2998 if (mode&RCV_SHUTDOWN) 2999 peer_mode |= SEND_SHUTDOWN; 3000 if (mode&SEND_SHUTDOWN) 3001 peer_mode |= RCV_SHUTDOWN; 3002 unix_state_lock(other); 3003 other->sk_shutdown |= peer_mode; 3004 unix_state_unlock(other); 3005 other->sk_state_change(other); 3006 if (peer_mode == SHUTDOWN_MASK) 3007 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 3008 else if (peer_mode & RCV_SHUTDOWN) 3009 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 3010 } 3011 if (other) 3012 sock_put(other); 3013 3014 return 0; 3015 } 3016 3017 long unix_inq_len(struct sock *sk) 3018 { 3019 struct sk_buff *skb; 3020 long amount = 0; 3021 3022 if (sk->sk_state == TCP_LISTEN) 3023 return -EINVAL; 3024 3025 spin_lock(&sk->sk_receive_queue.lock); 3026 if (sk->sk_type == SOCK_STREAM || 3027 sk->sk_type == SOCK_SEQPACKET) { 3028 skb_queue_walk(&sk->sk_receive_queue, skb) 3029 amount += unix_skb_len(skb); 3030 } else { 3031 skb = skb_peek(&sk->sk_receive_queue); 3032 if (skb) 3033 amount = skb->len; 3034 } 3035 spin_unlock(&sk->sk_receive_queue.lock); 3036 3037 return amount; 3038 } 3039 EXPORT_SYMBOL_GPL(unix_inq_len); 3040 3041 long unix_outq_len(struct sock *sk) 3042 { 3043 return sk_wmem_alloc_get(sk); 3044 } 3045 EXPORT_SYMBOL_GPL(unix_outq_len); 3046 3047 static int unix_open_file(struct sock *sk) 3048 { 3049 struct path path; 3050 struct file *f; 3051 int fd; 3052 3053 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3054 return -EPERM; 3055 3056 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3057 return -ENOENT; 3058 3059 path = unix_sk(sk)->path; 3060 if (!path.dentry) 3061 return -ENOENT; 3062 3063 path_get(&path); 3064 3065 fd = get_unused_fd_flags(O_CLOEXEC); 3066 if (fd < 0) 3067 goto out; 3068 3069 f = dentry_open(&path, O_PATH, current_cred()); 3070 if (IS_ERR(f)) { 3071 put_unused_fd(fd); 3072 fd = PTR_ERR(f); 3073 goto out; 3074 } 3075 3076 fd_install(fd, f); 3077 out: 3078 path_put(&path); 3079 3080 return fd; 3081 } 3082 3083 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3084 { 3085 struct sock *sk = sock->sk; 3086 long amount = 0; 3087 int err; 3088 3089 switch (cmd) { 3090 case SIOCOUTQ: 3091 amount = unix_outq_len(sk); 3092 err = put_user(amount, (int __user *)arg); 3093 break; 3094 case SIOCINQ: 3095 amount = unix_inq_len(sk); 3096 if (amount < 0) 3097 err = amount; 3098 else 3099 err = put_user(amount, (int __user *)arg); 3100 break; 3101 case SIOCUNIXFILE: 3102 err = unix_open_file(sk); 3103 break; 3104 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3105 case SIOCATMARK: 3106 { 3107 struct sk_buff *skb; 3108 int answ = 0; 3109 3110 skb = skb_peek(&sk->sk_receive_queue); 3111 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3112 answ = 1; 3113 err = put_user(answ, (int __user *)arg); 3114 } 3115 break; 3116 #endif 3117 default: 3118 err = -ENOIOCTLCMD; 3119 break; 3120 } 3121 return err; 3122 } 3123 3124 #ifdef CONFIG_COMPAT 3125 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3126 { 3127 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3128 } 3129 #endif 3130 3131 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3132 { 3133 struct sock *sk = sock->sk; 3134 __poll_t mask; 3135 3136 sock_poll_wait(file, sock, wait); 3137 mask = 0; 3138 3139 /* exceptional events? */ 3140 if (sk->sk_err) 3141 mask |= EPOLLERR; 3142 if (sk->sk_shutdown == SHUTDOWN_MASK) 3143 mask |= EPOLLHUP; 3144 if (sk->sk_shutdown & RCV_SHUTDOWN) 3145 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3146 3147 /* readable? */ 3148 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3149 mask |= EPOLLIN | EPOLLRDNORM; 3150 if (sk_is_readable(sk)) 3151 mask |= EPOLLIN | EPOLLRDNORM; 3152 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3153 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3154 mask |= EPOLLPRI; 3155 #endif 3156 3157 /* Connection-based need to check for termination and startup */ 3158 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3159 sk->sk_state == TCP_CLOSE) 3160 mask |= EPOLLHUP; 3161 3162 /* 3163 * we set writable also when the other side has shut down the 3164 * connection. This prevents stuck sockets. 3165 */ 3166 if (unix_writable(sk)) 3167 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3168 3169 return mask; 3170 } 3171 3172 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3173 poll_table *wait) 3174 { 3175 struct sock *sk = sock->sk, *other; 3176 unsigned int writable; 3177 __poll_t mask; 3178 3179 sock_poll_wait(file, sock, wait); 3180 mask = 0; 3181 3182 /* exceptional events? */ 3183 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3184 mask |= EPOLLERR | 3185 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3186 3187 if (sk->sk_shutdown & RCV_SHUTDOWN) 3188 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3189 if (sk->sk_shutdown == SHUTDOWN_MASK) 3190 mask |= EPOLLHUP; 3191 3192 /* readable? */ 3193 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3194 mask |= EPOLLIN | EPOLLRDNORM; 3195 if (sk_is_readable(sk)) 3196 mask |= EPOLLIN | EPOLLRDNORM; 3197 3198 /* Connection-based need to check for termination and startup */ 3199 if (sk->sk_type == SOCK_SEQPACKET) { 3200 if (sk->sk_state == TCP_CLOSE) 3201 mask |= EPOLLHUP; 3202 /* connection hasn't started yet? */ 3203 if (sk->sk_state == TCP_SYN_SENT) 3204 return mask; 3205 } 3206 3207 /* No write status requested, avoid expensive OUT tests. */ 3208 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3209 return mask; 3210 3211 writable = unix_writable(sk); 3212 if (writable) { 3213 unix_state_lock(sk); 3214 3215 other = unix_peer(sk); 3216 if (other && unix_peer(other) != sk && 3217 unix_recvq_full_lockless(other) && 3218 unix_dgram_peer_wake_me(sk, other)) 3219 writable = 0; 3220 3221 unix_state_unlock(sk); 3222 } 3223 3224 if (writable) 3225 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3226 else 3227 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3228 3229 return mask; 3230 } 3231 3232 #ifdef CONFIG_PROC_FS 3233 3234 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3235 3236 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3237 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3238 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3239 3240 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3241 { 3242 unsigned long offset = get_offset(*pos); 3243 unsigned long bucket = get_bucket(*pos); 3244 unsigned long count = 0; 3245 struct sock *sk; 3246 3247 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3248 sk; sk = sk_next(sk)) { 3249 if (++count == offset) 3250 break; 3251 } 3252 3253 return sk; 3254 } 3255 3256 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3257 { 3258 unsigned long bucket = get_bucket(*pos); 3259 struct net *net = seq_file_net(seq); 3260 struct sock *sk; 3261 3262 while (bucket < UNIX_HASH_SIZE) { 3263 spin_lock(&net->unx.table.locks[bucket]); 3264 3265 sk = unix_from_bucket(seq, pos); 3266 if (sk) 3267 return sk; 3268 3269 spin_unlock(&net->unx.table.locks[bucket]); 3270 3271 *pos = set_bucket_offset(++bucket, 1); 3272 } 3273 3274 return NULL; 3275 } 3276 3277 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3278 loff_t *pos) 3279 { 3280 unsigned long bucket = get_bucket(*pos); 3281 3282 sk = sk_next(sk); 3283 if (sk) 3284 return sk; 3285 3286 3287 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3288 3289 *pos = set_bucket_offset(++bucket, 1); 3290 3291 return unix_get_first(seq, pos); 3292 } 3293 3294 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3295 { 3296 if (!*pos) 3297 return SEQ_START_TOKEN; 3298 3299 return unix_get_first(seq, pos); 3300 } 3301 3302 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3303 { 3304 ++*pos; 3305 3306 if (v == SEQ_START_TOKEN) 3307 return unix_get_first(seq, pos); 3308 3309 return unix_get_next(seq, v, pos); 3310 } 3311 3312 static void unix_seq_stop(struct seq_file *seq, void *v) 3313 { 3314 struct sock *sk = v; 3315 3316 if (sk) 3317 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3318 } 3319 3320 static int unix_seq_show(struct seq_file *seq, void *v) 3321 { 3322 3323 if (v == SEQ_START_TOKEN) 3324 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3325 "Inode Path\n"); 3326 else { 3327 struct sock *s = v; 3328 struct unix_sock *u = unix_sk(s); 3329 unix_state_lock(s); 3330 3331 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3332 s, 3333 refcount_read(&s->sk_refcnt), 3334 0, 3335 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3336 s->sk_type, 3337 s->sk_socket ? 3338 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3339 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3340 sock_i_ino(s)); 3341 3342 if (u->addr) { // under a hash table lock here 3343 int i, len; 3344 seq_putc(seq, ' '); 3345 3346 i = 0; 3347 len = u->addr->len - 3348 offsetof(struct sockaddr_un, sun_path); 3349 if (u->addr->name->sun_path[0]) { 3350 len--; 3351 } else { 3352 seq_putc(seq, '@'); 3353 i++; 3354 } 3355 for ( ; i < len; i++) 3356 seq_putc(seq, u->addr->name->sun_path[i] ?: 3357 '@'); 3358 } 3359 unix_state_unlock(s); 3360 seq_putc(seq, '\n'); 3361 } 3362 3363 return 0; 3364 } 3365 3366 static const struct seq_operations unix_seq_ops = { 3367 .start = unix_seq_start, 3368 .next = unix_seq_next, 3369 .stop = unix_seq_stop, 3370 .show = unix_seq_show, 3371 }; 3372 3373 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3374 struct bpf_unix_iter_state { 3375 struct seq_net_private p; 3376 unsigned int cur_sk; 3377 unsigned int end_sk; 3378 unsigned int max_sk; 3379 struct sock **batch; 3380 bool st_bucket_done; 3381 }; 3382 3383 struct bpf_iter__unix { 3384 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3385 __bpf_md_ptr(struct unix_sock *, unix_sk); 3386 uid_t uid __aligned(8); 3387 }; 3388 3389 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3390 struct unix_sock *unix_sk, uid_t uid) 3391 { 3392 struct bpf_iter__unix ctx; 3393 3394 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3395 ctx.meta = meta; 3396 ctx.unix_sk = unix_sk; 3397 ctx.uid = uid; 3398 return bpf_iter_run_prog(prog, &ctx); 3399 } 3400 3401 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3402 3403 { 3404 struct bpf_unix_iter_state *iter = seq->private; 3405 unsigned int expected = 1; 3406 struct sock *sk; 3407 3408 sock_hold(start_sk); 3409 iter->batch[iter->end_sk++] = start_sk; 3410 3411 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3412 if (iter->end_sk < iter->max_sk) { 3413 sock_hold(sk); 3414 iter->batch[iter->end_sk++] = sk; 3415 } 3416 3417 expected++; 3418 } 3419 3420 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3421 3422 return expected; 3423 } 3424 3425 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3426 { 3427 while (iter->cur_sk < iter->end_sk) 3428 sock_put(iter->batch[iter->cur_sk++]); 3429 } 3430 3431 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3432 unsigned int new_batch_sz) 3433 { 3434 struct sock **new_batch; 3435 3436 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3437 GFP_USER | __GFP_NOWARN); 3438 if (!new_batch) 3439 return -ENOMEM; 3440 3441 bpf_iter_unix_put_batch(iter); 3442 kvfree(iter->batch); 3443 iter->batch = new_batch; 3444 iter->max_sk = new_batch_sz; 3445 3446 return 0; 3447 } 3448 3449 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3450 loff_t *pos) 3451 { 3452 struct bpf_unix_iter_state *iter = seq->private; 3453 unsigned int expected; 3454 bool resized = false; 3455 struct sock *sk; 3456 3457 if (iter->st_bucket_done) 3458 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3459 3460 again: 3461 /* Get a new batch */ 3462 iter->cur_sk = 0; 3463 iter->end_sk = 0; 3464 3465 sk = unix_get_first(seq, pos); 3466 if (!sk) 3467 return NULL; /* Done */ 3468 3469 expected = bpf_iter_unix_hold_batch(seq, sk); 3470 3471 if (iter->end_sk == expected) { 3472 iter->st_bucket_done = true; 3473 return sk; 3474 } 3475 3476 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3477 resized = true; 3478 goto again; 3479 } 3480 3481 return sk; 3482 } 3483 3484 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3485 { 3486 if (!*pos) 3487 return SEQ_START_TOKEN; 3488 3489 /* bpf iter does not support lseek, so it always 3490 * continue from where it was stop()-ped. 3491 */ 3492 return bpf_iter_unix_batch(seq, pos); 3493 } 3494 3495 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3496 { 3497 struct bpf_unix_iter_state *iter = seq->private; 3498 struct sock *sk; 3499 3500 /* Whenever seq_next() is called, the iter->cur_sk is 3501 * done with seq_show(), so advance to the next sk in 3502 * the batch. 3503 */ 3504 if (iter->cur_sk < iter->end_sk) 3505 sock_put(iter->batch[iter->cur_sk++]); 3506 3507 ++*pos; 3508 3509 if (iter->cur_sk < iter->end_sk) 3510 sk = iter->batch[iter->cur_sk]; 3511 else 3512 sk = bpf_iter_unix_batch(seq, pos); 3513 3514 return sk; 3515 } 3516 3517 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3518 { 3519 struct bpf_iter_meta meta; 3520 struct bpf_prog *prog; 3521 struct sock *sk = v; 3522 uid_t uid; 3523 bool slow; 3524 int ret; 3525 3526 if (v == SEQ_START_TOKEN) 3527 return 0; 3528 3529 slow = lock_sock_fast(sk); 3530 3531 if (unlikely(sk_unhashed(sk))) { 3532 ret = SEQ_SKIP; 3533 goto unlock; 3534 } 3535 3536 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3537 meta.seq = seq; 3538 prog = bpf_iter_get_info(&meta, false); 3539 ret = unix_prog_seq_show(prog, &meta, v, uid); 3540 unlock: 3541 unlock_sock_fast(sk, slow); 3542 return ret; 3543 } 3544 3545 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3546 { 3547 struct bpf_unix_iter_state *iter = seq->private; 3548 struct bpf_iter_meta meta; 3549 struct bpf_prog *prog; 3550 3551 if (!v) { 3552 meta.seq = seq; 3553 prog = bpf_iter_get_info(&meta, true); 3554 if (prog) 3555 (void)unix_prog_seq_show(prog, &meta, v, 0); 3556 } 3557 3558 if (iter->cur_sk < iter->end_sk) 3559 bpf_iter_unix_put_batch(iter); 3560 } 3561 3562 static const struct seq_operations bpf_iter_unix_seq_ops = { 3563 .start = bpf_iter_unix_seq_start, 3564 .next = bpf_iter_unix_seq_next, 3565 .stop = bpf_iter_unix_seq_stop, 3566 .show = bpf_iter_unix_seq_show, 3567 }; 3568 #endif 3569 #endif 3570 3571 static const struct net_proto_family unix_family_ops = { 3572 .family = PF_UNIX, 3573 .create = unix_create, 3574 .owner = THIS_MODULE, 3575 }; 3576 3577 3578 static int __net_init unix_net_init(struct net *net) 3579 { 3580 int i; 3581 3582 net->unx.sysctl_max_dgram_qlen = 10; 3583 if (unix_sysctl_register(net)) 3584 goto out; 3585 3586 #ifdef CONFIG_PROC_FS 3587 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3588 sizeof(struct seq_net_private))) 3589 goto err_sysctl; 3590 #endif 3591 3592 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3593 sizeof(spinlock_t), GFP_KERNEL); 3594 if (!net->unx.table.locks) 3595 goto err_proc; 3596 3597 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3598 sizeof(struct hlist_head), 3599 GFP_KERNEL); 3600 if (!net->unx.table.buckets) 3601 goto free_locks; 3602 3603 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3604 spin_lock_init(&net->unx.table.locks[i]); 3605 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3606 } 3607 3608 return 0; 3609 3610 free_locks: 3611 kvfree(net->unx.table.locks); 3612 err_proc: 3613 #ifdef CONFIG_PROC_FS 3614 remove_proc_entry("unix", net->proc_net); 3615 err_sysctl: 3616 #endif 3617 unix_sysctl_unregister(net); 3618 out: 3619 return -ENOMEM; 3620 } 3621 3622 static void __net_exit unix_net_exit(struct net *net) 3623 { 3624 kvfree(net->unx.table.buckets); 3625 kvfree(net->unx.table.locks); 3626 unix_sysctl_unregister(net); 3627 remove_proc_entry("unix", net->proc_net); 3628 } 3629 3630 static struct pernet_operations unix_net_ops = { 3631 .init = unix_net_init, 3632 .exit = unix_net_exit, 3633 }; 3634 3635 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3636 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3637 struct unix_sock *unix_sk, uid_t uid) 3638 3639 #define INIT_BATCH_SZ 16 3640 3641 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3642 { 3643 struct bpf_unix_iter_state *iter = priv_data; 3644 int err; 3645 3646 err = bpf_iter_init_seq_net(priv_data, aux); 3647 if (err) 3648 return err; 3649 3650 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3651 if (err) { 3652 bpf_iter_fini_seq_net(priv_data); 3653 return err; 3654 } 3655 3656 return 0; 3657 } 3658 3659 static void bpf_iter_fini_unix(void *priv_data) 3660 { 3661 struct bpf_unix_iter_state *iter = priv_data; 3662 3663 bpf_iter_fini_seq_net(priv_data); 3664 kvfree(iter->batch); 3665 } 3666 3667 static const struct bpf_iter_seq_info unix_seq_info = { 3668 .seq_ops = &bpf_iter_unix_seq_ops, 3669 .init_seq_private = bpf_iter_init_unix, 3670 .fini_seq_private = bpf_iter_fini_unix, 3671 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3672 }; 3673 3674 static const struct bpf_func_proto * 3675 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3676 const struct bpf_prog *prog) 3677 { 3678 switch (func_id) { 3679 case BPF_FUNC_setsockopt: 3680 return &bpf_sk_setsockopt_proto; 3681 case BPF_FUNC_getsockopt: 3682 return &bpf_sk_getsockopt_proto; 3683 default: 3684 return NULL; 3685 } 3686 } 3687 3688 static struct bpf_iter_reg unix_reg_info = { 3689 .target = "unix", 3690 .ctx_arg_info_size = 1, 3691 .ctx_arg_info = { 3692 { offsetof(struct bpf_iter__unix, unix_sk), 3693 PTR_TO_BTF_ID_OR_NULL }, 3694 }, 3695 .get_func_proto = bpf_iter_unix_get_func_proto, 3696 .seq_info = &unix_seq_info, 3697 }; 3698 3699 static void __init bpf_iter_register(void) 3700 { 3701 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3702 if (bpf_iter_reg_target(&unix_reg_info)) 3703 pr_warn("Warning: could not register bpf iterator unix\n"); 3704 } 3705 #endif 3706 3707 static int __init af_unix_init(void) 3708 { 3709 int i, rc = -1; 3710 3711 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3712 3713 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3714 spin_lock_init(&bsd_socket_locks[i]); 3715 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3716 } 3717 3718 rc = proto_register(&unix_dgram_proto, 1); 3719 if (rc != 0) { 3720 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3721 goto out; 3722 } 3723 3724 rc = proto_register(&unix_stream_proto, 1); 3725 if (rc != 0) { 3726 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3727 goto out; 3728 } 3729 3730 sock_register(&unix_family_ops); 3731 register_pernet_subsys(&unix_net_ops); 3732 unix_bpf_build_proto(); 3733 3734 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3735 bpf_iter_register(); 3736 #endif 3737 3738 out: 3739 return rc; 3740 } 3741 3742 static void __exit af_unix_exit(void) 3743 { 3744 sock_unregister(PF_UNIX); 3745 proto_unregister(&unix_dgram_proto); 3746 proto_unregister(&unix_stream_proto); 3747 unregister_pernet_subsys(&unix_net_ops); 3748 } 3749 3750 /* Earlier than device_initcall() so that other drivers invoking 3751 request_module() don't end up in a loop when modprobe tries 3752 to use a UNIX socket. But later than subsys_initcall() because 3753 we depend on stuff initialised there */ 3754 fs_initcall(af_unix_init); 3755 module_exit(af_unix_exit); 3756 3757 MODULE_LICENSE("GPL"); 3758 MODULE_ALIAS_NETPROTO(PF_UNIX); 3759