1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/freezer.h> 116 #include <linux/file.h> 117 #include <linux/btf_ids.h> 118 119 #include "scm.h" 120 121 static atomic_long_t unix_nr_socks; 122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 124 125 /* SMP locking strategy: 126 * hash table is protected with spinlock. 127 * each socket state is protected by separate spinlock. 128 */ 129 130 static unsigned int unix_unbound_hash(struct sock *sk) 131 { 132 unsigned long hash = (unsigned long)sk; 133 134 hash ^= hash >> 16; 135 hash ^= hash >> 8; 136 hash ^= sk->sk_type; 137 138 return hash & UNIX_HASH_MOD; 139 } 140 141 static unsigned int unix_bsd_hash(struct inode *i) 142 { 143 return i->i_ino & UNIX_HASH_MOD; 144 } 145 146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 147 int addr_len, int type) 148 { 149 __wsum csum = csum_partial(sunaddr, addr_len, 0); 150 unsigned int hash; 151 152 hash = (__force unsigned int)csum_fold(csum); 153 hash ^= hash >> 8; 154 hash ^= type; 155 156 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 157 } 158 159 static void unix_table_double_lock(struct net *net, 160 unsigned int hash1, unsigned int hash2) 161 { 162 if (hash1 == hash2) { 163 spin_lock(&net->unx.table.locks[hash1]); 164 return; 165 } 166 167 if (hash1 > hash2) 168 swap(hash1, hash2); 169 170 spin_lock(&net->unx.table.locks[hash1]); 171 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 172 } 173 174 static void unix_table_double_unlock(struct net *net, 175 unsigned int hash1, unsigned int hash2) 176 { 177 if (hash1 == hash2) { 178 spin_unlock(&net->unx.table.locks[hash1]); 179 return; 180 } 181 182 spin_unlock(&net->unx.table.locks[hash1]); 183 spin_unlock(&net->unx.table.locks[hash2]); 184 } 185 186 #ifdef CONFIG_SECURITY_NETWORK 187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 188 { 189 UNIXCB(skb).secid = scm->secid; 190 } 191 192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 193 { 194 scm->secid = UNIXCB(skb).secid; 195 } 196 197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 198 { 199 return (scm->secid == UNIXCB(skb).secid); 200 } 201 #else 202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 203 { } 204 205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 206 { } 207 208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 209 { 210 return true; 211 } 212 #endif /* CONFIG_SECURITY_NETWORK */ 213 214 #define unix_peer(sk) (unix_sk(sk)->peer) 215 216 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 217 { 218 return unix_peer(osk) == sk; 219 } 220 221 static inline int unix_may_send(struct sock *sk, struct sock *osk) 222 { 223 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 224 } 225 226 static inline int unix_recvq_full(const struct sock *sk) 227 { 228 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 229 } 230 231 static inline int unix_recvq_full_lockless(const struct sock *sk) 232 { 233 return skb_queue_len_lockless(&sk->sk_receive_queue) > 234 READ_ONCE(sk->sk_max_ack_backlog); 235 } 236 237 struct sock *unix_peer_get(struct sock *s) 238 { 239 struct sock *peer; 240 241 unix_state_lock(s); 242 peer = unix_peer(s); 243 if (peer) 244 sock_hold(peer); 245 unix_state_unlock(s); 246 return peer; 247 } 248 EXPORT_SYMBOL_GPL(unix_peer_get); 249 250 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 251 int addr_len) 252 { 253 struct unix_address *addr; 254 255 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 256 if (!addr) 257 return NULL; 258 259 refcount_set(&addr->refcnt, 1); 260 addr->len = addr_len; 261 memcpy(addr->name, sunaddr, addr_len); 262 263 return addr; 264 } 265 266 static inline void unix_release_addr(struct unix_address *addr) 267 { 268 if (refcount_dec_and_test(&addr->refcnt)) 269 kfree(addr); 270 } 271 272 /* 273 * Check unix socket name: 274 * - should be not zero length. 275 * - if started by not zero, should be NULL terminated (FS object) 276 * - if started by zero, it is abstract name. 277 */ 278 279 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 280 { 281 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 282 addr_len > sizeof(*sunaddr)) 283 return -EINVAL; 284 285 if (sunaddr->sun_family != AF_UNIX) 286 return -EINVAL; 287 288 return 0; 289 } 290 291 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 292 { 293 /* This may look like an off by one error but it is a bit more 294 * subtle. 108 is the longest valid AF_UNIX path for a binding. 295 * sun_path[108] doesn't as such exist. However in kernel space 296 * we are guaranteed that it is a valid memory location in our 297 * kernel address buffer because syscall functions always pass 298 * a pointer of struct sockaddr_storage which has a bigger buffer 299 * than 108. 300 */ 301 ((char *)sunaddr)[addr_len] = 0; 302 } 303 304 static void __unix_remove_socket(struct sock *sk) 305 { 306 sk_del_node_init(sk); 307 } 308 309 static void __unix_insert_socket(struct net *net, struct sock *sk) 310 { 311 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 312 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 313 } 314 315 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 316 struct unix_address *addr, unsigned int hash) 317 { 318 __unix_remove_socket(sk); 319 smp_store_release(&unix_sk(sk)->addr, addr); 320 321 sk->sk_hash = hash; 322 __unix_insert_socket(net, sk); 323 } 324 325 static void unix_remove_socket(struct net *net, struct sock *sk) 326 { 327 spin_lock(&net->unx.table.locks[sk->sk_hash]); 328 __unix_remove_socket(sk); 329 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 330 } 331 332 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 333 { 334 spin_lock(&net->unx.table.locks[sk->sk_hash]); 335 __unix_insert_socket(net, sk); 336 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 337 } 338 339 static void unix_insert_bsd_socket(struct sock *sk) 340 { 341 spin_lock(&bsd_socket_locks[sk->sk_hash]); 342 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 343 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 344 } 345 346 static void unix_remove_bsd_socket(struct sock *sk) 347 { 348 if (!hlist_unhashed(&sk->sk_bind_node)) { 349 spin_lock(&bsd_socket_locks[sk->sk_hash]); 350 __sk_del_bind_node(sk); 351 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 352 353 sk_node_init(&sk->sk_bind_node); 354 } 355 } 356 357 static struct sock *__unix_find_socket_byname(struct net *net, 358 struct sockaddr_un *sunname, 359 int len, unsigned int hash) 360 { 361 struct sock *s; 362 363 sk_for_each(s, &net->unx.table.buckets[hash]) { 364 struct unix_sock *u = unix_sk(s); 365 366 if (u->addr->len == len && 367 !memcmp(u->addr->name, sunname, len)) 368 return s; 369 } 370 return NULL; 371 } 372 373 static inline struct sock *unix_find_socket_byname(struct net *net, 374 struct sockaddr_un *sunname, 375 int len, unsigned int hash) 376 { 377 struct sock *s; 378 379 spin_lock(&net->unx.table.locks[hash]); 380 s = __unix_find_socket_byname(net, sunname, len, hash); 381 if (s) 382 sock_hold(s); 383 spin_unlock(&net->unx.table.locks[hash]); 384 return s; 385 } 386 387 static struct sock *unix_find_socket_byinode(struct inode *i) 388 { 389 unsigned int hash = unix_bsd_hash(i); 390 struct sock *s; 391 392 spin_lock(&bsd_socket_locks[hash]); 393 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 394 struct dentry *dentry = unix_sk(s)->path.dentry; 395 396 if (dentry && d_backing_inode(dentry) == i) { 397 sock_hold(s); 398 spin_unlock(&bsd_socket_locks[hash]); 399 return s; 400 } 401 } 402 spin_unlock(&bsd_socket_locks[hash]); 403 return NULL; 404 } 405 406 /* Support code for asymmetrically connected dgram sockets 407 * 408 * If a datagram socket is connected to a socket not itself connected 409 * to the first socket (eg, /dev/log), clients may only enqueue more 410 * messages if the present receive queue of the server socket is not 411 * "too large". This means there's a second writeability condition 412 * poll and sendmsg need to test. The dgram recv code will do a wake 413 * up on the peer_wait wait queue of a socket upon reception of a 414 * datagram which needs to be propagated to sleeping would-be writers 415 * since these might not have sent anything so far. This can't be 416 * accomplished via poll_wait because the lifetime of the server 417 * socket might be less than that of its clients if these break their 418 * association with it or if the server socket is closed while clients 419 * are still connected to it and there's no way to inform "a polling 420 * implementation" that it should let go of a certain wait queue 421 * 422 * In order to propagate a wake up, a wait_queue_entry_t of the client 423 * socket is enqueued on the peer_wait queue of the server socket 424 * whose wake function does a wake_up on the ordinary client socket 425 * wait queue. This connection is established whenever a write (or 426 * poll for write) hit the flow control condition and broken when the 427 * association to the server socket is dissolved or after a wake up 428 * was relayed. 429 */ 430 431 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 432 void *key) 433 { 434 struct unix_sock *u; 435 wait_queue_head_t *u_sleep; 436 437 u = container_of(q, struct unix_sock, peer_wake); 438 439 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 440 q); 441 u->peer_wake.private = NULL; 442 443 /* relaying can only happen while the wq still exists */ 444 u_sleep = sk_sleep(&u->sk); 445 if (u_sleep) 446 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 447 448 return 0; 449 } 450 451 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 452 { 453 struct unix_sock *u, *u_other; 454 int rc; 455 456 u = unix_sk(sk); 457 u_other = unix_sk(other); 458 rc = 0; 459 spin_lock(&u_other->peer_wait.lock); 460 461 if (!u->peer_wake.private) { 462 u->peer_wake.private = other; 463 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 464 465 rc = 1; 466 } 467 468 spin_unlock(&u_other->peer_wait.lock); 469 return rc; 470 } 471 472 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 473 struct sock *other) 474 { 475 struct unix_sock *u, *u_other; 476 477 u = unix_sk(sk); 478 u_other = unix_sk(other); 479 spin_lock(&u_other->peer_wait.lock); 480 481 if (u->peer_wake.private == other) { 482 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 483 u->peer_wake.private = NULL; 484 } 485 486 spin_unlock(&u_other->peer_wait.lock); 487 } 488 489 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 490 struct sock *other) 491 { 492 unix_dgram_peer_wake_disconnect(sk, other); 493 wake_up_interruptible_poll(sk_sleep(sk), 494 EPOLLOUT | 495 EPOLLWRNORM | 496 EPOLLWRBAND); 497 } 498 499 /* preconditions: 500 * - unix_peer(sk) == other 501 * - association is stable 502 */ 503 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 504 { 505 int connected; 506 507 connected = unix_dgram_peer_wake_connect(sk, other); 508 509 /* If other is SOCK_DEAD, we want to make sure we signal 510 * POLLOUT, such that a subsequent write() can get a 511 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 512 * to other and its full, we will hang waiting for POLLOUT. 513 */ 514 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 515 return 1; 516 517 if (connected) 518 unix_dgram_peer_wake_disconnect(sk, other); 519 520 return 0; 521 } 522 523 static int unix_writable(const struct sock *sk) 524 { 525 return sk->sk_state != TCP_LISTEN && 526 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 527 } 528 529 static void unix_write_space(struct sock *sk) 530 { 531 struct socket_wq *wq; 532 533 rcu_read_lock(); 534 if (unix_writable(sk)) { 535 wq = rcu_dereference(sk->sk_wq); 536 if (skwq_has_sleeper(wq)) 537 wake_up_interruptible_sync_poll(&wq->wait, 538 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 539 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 540 } 541 rcu_read_unlock(); 542 } 543 544 /* When dgram socket disconnects (or changes its peer), we clear its receive 545 * queue of packets arrived from previous peer. First, it allows to do 546 * flow control based only on wmem_alloc; second, sk connected to peer 547 * may receive messages only from that peer. */ 548 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 549 { 550 if (!skb_queue_empty(&sk->sk_receive_queue)) { 551 skb_queue_purge(&sk->sk_receive_queue); 552 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 553 554 /* If one link of bidirectional dgram pipe is disconnected, 555 * we signal error. Messages are lost. Do not make this, 556 * when peer was not connected to us. 557 */ 558 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 559 other->sk_err = ECONNRESET; 560 sk_error_report(other); 561 } 562 } 563 other->sk_state = TCP_CLOSE; 564 } 565 566 static void unix_sock_destructor(struct sock *sk) 567 { 568 struct unix_sock *u = unix_sk(sk); 569 570 skb_queue_purge(&sk->sk_receive_queue); 571 572 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 573 if (u->oob_skb) { 574 kfree_skb(u->oob_skb); 575 u->oob_skb = NULL; 576 } 577 #endif 578 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 579 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 580 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 581 if (!sock_flag(sk, SOCK_DEAD)) { 582 pr_info("Attempt to release alive unix socket: %p\n", sk); 583 return; 584 } 585 586 if (u->addr) 587 unix_release_addr(u->addr); 588 589 atomic_long_dec(&unix_nr_socks); 590 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 591 #ifdef UNIX_REFCNT_DEBUG 592 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 593 atomic_long_read(&unix_nr_socks)); 594 #endif 595 } 596 597 static void unix_release_sock(struct sock *sk, int embrion) 598 { 599 struct unix_sock *u = unix_sk(sk); 600 struct sock *skpair; 601 struct sk_buff *skb; 602 struct path path; 603 int state; 604 605 unix_remove_socket(sock_net(sk), sk); 606 unix_remove_bsd_socket(sk); 607 608 /* Clear state */ 609 unix_state_lock(sk); 610 sock_orphan(sk); 611 sk->sk_shutdown = SHUTDOWN_MASK; 612 path = u->path; 613 u->path.dentry = NULL; 614 u->path.mnt = NULL; 615 state = sk->sk_state; 616 sk->sk_state = TCP_CLOSE; 617 618 skpair = unix_peer(sk); 619 unix_peer(sk) = NULL; 620 621 unix_state_unlock(sk); 622 623 wake_up_interruptible_all(&u->peer_wait); 624 625 if (skpair != NULL) { 626 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 627 unix_state_lock(skpair); 628 /* No more writes */ 629 skpair->sk_shutdown = SHUTDOWN_MASK; 630 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 631 skpair->sk_err = ECONNRESET; 632 unix_state_unlock(skpair); 633 skpair->sk_state_change(skpair); 634 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 635 } 636 637 unix_dgram_peer_wake_disconnect(sk, skpair); 638 sock_put(skpair); /* It may now die */ 639 } 640 641 /* Try to flush out this socket. Throw out buffers at least */ 642 643 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 644 if (state == TCP_LISTEN) 645 unix_release_sock(skb->sk, 1); 646 /* passed fds are erased in the kfree_skb hook */ 647 UNIXCB(skb).consumed = skb->len; 648 kfree_skb(skb); 649 } 650 651 if (path.dentry) 652 path_put(&path); 653 654 sock_put(sk); 655 656 /* ---- Socket is dead now and most probably destroyed ---- */ 657 658 /* 659 * Fixme: BSD difference: In BSD all sockets connected to us get 660 * ECONNRESET and we die on the spot. In Linux we behave 661 * like files and pipes do and wait for the last 662 * dereference. 663 * 664 * Can't we simply set sock->err? 665 * 666 * What the above comment does talk about? --ANK(980817) 667 */ 668 669 if (unix_tot_inflight) 670 unix_gc(); /* Garbage collect fds */ 671 } 672 673 static void init_peercred(struct sock *sk) 674 { 675 const struct cred *old_cred; 676 struct pid *old_pid; 677 678 spin_lock(&sk->sk_peer_lock); 679 old_pid = sk->sk_peer_pid; 680 old_cred = sk->sk_peer_cred; 681 sk->sk_peer_pid = get_pid(task_tgid(current)); 682 sk->sk_peer_cred = get_current_cred(); 683 spin_unlock(&sk->sk_peer_lock); 684 685 put_pid(old_pid); 686 put_cred(old_cred); 687 } 688 689 static void copy_peercred(struct sock *sk, struct sock *peersk) 690 { 691 const struct cred *old_cred; 692 struct pid *old_pid; 693 694 if (sk < peersk) { 695 spin_lock(&sk->sk_peer_lock); 696 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 697 } else { 698 spin_lock(&peersk->sk_peer_lock); 699 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 700 } 701 old_pid = sk->sk_peer_pid; 702 old_cred = sk->sk_peer_cred; 703 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 704 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 705 706 spin_unlock(&sk->sk_peer_lock); 707 spin_unlock(&peersk->sk_peer_lock); 708 709 put_pid(old_pid); 710 put_cred(old_cred); 711 } 712 713 static int unix_listen(struct socket *sock, int backlog) 714 { 715 int err; 716 struct sock *sk = sock->sk; 717 struct unix_sock *u = unix_sk(sk); 718 719 err = -EOPNOTSUPP; 720 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 721 goto out; /* Only stream/seqpacket sockets accept */ 722 err = -EINVAL; 723 if (!u->addr) 724 goto out; /* No listens on an unbound socket */ 725 unix_state_lock(sk); 726 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 727 goto out_unlock; 728 if (backlog > sk->sk_max_ack_backlog) 729 wake_up_interruptible_all(&u->peer_wait); 730 sk->sk_max_ack_backlog = backlog; 731 sk->sk_state = TCP_LISTEN; 732 /* set credentials so connect can copy them */ 733 init_peercred(sk); 734 err = 0; 735 736 out_unlock: 737 unix_state_unlock(sk); 738 out: 739 return err; 740 } 741 742 static int unix_release(struct socket *); 743 static int unix_bind(struct socket *, struct sockaddr *, int); 744 static int unix_stream_connect(struct socket *, struct sockaddr *, 745 int addr_len, int flags); 746 static int unix_socketpair(struct socket *, struct socket *); 747 static int unix_accept(struct socket *, struct socket *, int, bool); 748 static int unix_getname(struct socket *, struct sockaddr *, int); 749 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 750 static __poll_t unix_dgram_poll(struct file *, struct socket *, 751 poll_table *); 752 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 753 #ifdef CONFIG_COMPAT 754 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 755 #endif 756 static int unix_shutdown(struct socket *, int); 757 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 758 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 759 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 760 size_t size, int flags); 761 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 762 struct pipe_inode_info *, size_t size, 763 unsigned int flags); 764 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 765 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 766 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 767 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 768 static int unix_dgram_connect(struct socket *, struct sockaddr *, 769 int, int); 770 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 771 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 772 int); 773 774 static int unix_set_peek_off(struct sock *sk, int val) 775 { 776 struct unix_sock *u = unix_sk(sk); 777 778 if (mutex_lock_interruptible(&u->iolock)) 779 return -EINTR; 780 781 sk->sk_peek_off = val; 782 mutex_unlock(&u->iolock); 783 784 return 0; 785 } 786 787 #ifdef CONFIG_PROC_FS 788 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 789 { 790 struct sock *sk = sock->sk; 791 struct unix_sock *u; 792 793 if (sk) { 794 u = unix_sk(sock->sk); 795 seq_printf(m, "scm_fds: %u\n", 796 atomic_read(&u->scm_stat.nr_fds)); 797 } 798 } 799 #else 800 #define unix_show_fdinfo NULL 801 #endif 802 803 static const struct proto_ops unix_stream_ops = { 804 .family = PF_UNIX, 805 .owner = THIS_MODULE, 806 .release = unix_release, 807 .bind = unix_bind, 808 .connect = unix_stream_connect, 809 .socketpair = unix_socketpair, 810 .accept = unix_accept, 811 .getname = unix_getname, 812 .poll = unix_poll, 813 .ioctl = unix_ioctl, 814 #ifdef CONFIG_COMPAT 815 .compat_ioctl = unix_compat_ioctl, 816 #endif 817 .listen = unix_listen, 818 .shutdown = unix_shutdown, 819 .sendmsg = unix_stream_sendmsg, 820 .recvmsg = unix_stream_recvmsg, 821 .read_skb = unix_stream_read_skb, 822 .mmap = sock_no_mmap, 823 .sendpage = unix_stream_sendpage, 824 .splice_read = unix_stream_splice_read, 825 .set_peek_off = unix_set_peek_off, 826 .show_fdinfo = unix_show_fdinfo, 827 }; 828 829 static const struct proto_ops unix_dgram_ops = { 830 .family = PF_UNIX, 831 .owner = THIS_MODULE, 832 .release = unix_release, 833 .bind = unix_bind, 834 .connect = unix_dgram_connect, 835 .socketpair = unix_socketpair, 836 .accept = sock_no_accept, 837 .getname = unix_getname, 838 .poll = unix_dgram_poll, 839 .ioctl = unix_ioctl, 840 #ifdef CONFIG_COMPAT 841 .compat_ioctl = unix_compat_ioctl, 842 #endif 843 .listen = sock_no_listen, 844 .shutdown = unix_shutdown, 845 .sendmsg = unix_dgram_sendmsg, 846 .read_skb = unix_read_skb, 847 .recvmsg = unix_dgram_recvmsg, 848 .mmap = sock_no_mmap, 849 .sendpage = sock_no_sendpage, 850 .set_peek_off = unix_set_peek_off, 851 .show_fdinfo = unix_show_fdinfo, 852 }; 853 854 static const struct proto_ops unix_seqpacket_ops = { 855 .family = PF_UNIX, 856 .owner = THIS_MODULE, 857 .release = unix_release, 858 .bind = unix_bind, 859 .connect = unix_stream_connect, 860 .socketpair = unix_socketpair, 861 .accept = unix_accept, 862 .getname = unix_getname, 863 .poll = unix_dgram_poll, 864 .ioctl = unix_ioctl, 865 #ifdef CONFIG_COMPAT 866 .compat_ioctl = unix_compat_ioctl, 867 #endif 868 .listen = unix_listen, 869 .shutdown = unix_shutdown, 870 .sendmsg = unix_seqpacket_sendmsg, 871 .recvmsg = unix_seqpacket_recvmsg, 872 .mmap = sock_no_mmap, 873 .sendpage = sock_no_sendpage, 874 .set_peek_off = unix_set_peek_off, 875 .show_fdinfo = unix_show_fdinfo, 876 }; 877 878 static void unix_close(struct sock *sk, long timeout) 879 { 880 /* Nothing to do here, unix socket does not need a ->close(). 881 * This is merely for sockmap. 882 */ 883 } 884 885 static void unix_unhash(struct sock *sk) 886 { 887 /* Nothing to do here, unix socket does not need a ->unhash(). 888 * This is merely for sockmap. 889 */ 890 } 891 892 struct proto unix_dgram_proto = { 893 .name = "UNIX", 894 .owner = THIS_MODULE, 895 .obj_size = sizeof(struct unix_sock), 896 .close = unix_close, 897 #ifdef CONFIG_BPF_SYSCALL 898 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 899 #endif 900 }; 901 902 struct proto unix_stream_proto = { 903 .name = "UNIX-STREAM", 904 .owner = THIS_MODULE, 905 .obj_size = sizeof(struct unix_sock), 906 .close = unix_close, 907 .unhash = unix_unhash, 908 #ifdef CONFIG_BPF_SYSCALL 909 .psock_update_sk_prot = unix_stream_bpf_update_proto, 910 #endif 911 }; 912 913 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 914 { 915 struct unix_sock *u; 916 struct sock *sk; 917 int err; 918 919 atomic_long_inc(&unix_nr_socks); 920 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 921 err = -ENFILE; 922 goto err; 923 } 924 925 if (type == SOCK_STREAM) 926 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 927 else /*dgram and seqpacket */ 928 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 929 930 if (!sk) { 931 err = -ENOMEM; 932 goto err; 933 } 934 935 sock_init_data(sock, sk); 936 937 sk->sk_hash = unix_unbound_hash(sk); 938 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 939 sk->sk_write_space = unix_write_space; 940 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 941 sk->sk_destruct = unix_sock_destructor; 942 u = unix_sk(sk); 943 u->path.dentry = NULL; 944 u->path.mnt = NULL; 945 spin_lock_init(&u->lock); 946 atomic_long_set(&u->inflight, 0); 947 INIT_LIST_HEAD(&u->link); 948 mutex_init(&u->iolock); /* single task reading lock */ 949 mutex_init(&u->bindlock); /* single task binding lock */ 950 init_waitqueue_head(&u->peer_wait); 951 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 952 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 953 unix_insert_unbound_socket(net, sk); 954 955 sock_prot_inuse_add(net, sk->sk_prot, 1); 956 957 return sk; 958 959 err: 960 atomic_long_dec(&unix_nr_socks); 961 return ERR_PTR(err); 962 } 963 964 static int unix_create(struct net *net, struct socket *sock, int protocol, 965 int kern) 966 { 967 struct sock *sk; 968 969 if (protocol && protocol != PF_UNIX) 970 return -EPROTONOSUPPORT; 971 972 sock->state = SS_UNCONNECTED; 973 974 switch (sock->type) { 975 case SOCK_STREAM: 976 sock->ops = &unix_stream_ops; 977 break; 978 /* 979 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 980 * nothing uses it. 981 */ 982 case SOCK_RAW: 983 sock->type = SOCK_DGRAM; 984 fallthrough; 985 case SOCK_DGRAM: 986 sock->ops = &unix_dgram_ops; 987 break; 988 case SOCK_SEQPACKET: 989 sock->ops = &unix_seqpacket_ops; 990 break; 991 default: 992 return -ESOCKTNOSUPPORT; 993 } 994 995 sk = unix_create1(net, sock, kern, sock->type); 996 if (IS_ERR(sk)) 997 return PTR_ERR(sk); 998 999 return 0; 1000 } 1001 1002 static int unix_release(struct socket *sock) 1003 { 1004 struct sock *sk = sock->sk; 1005 1006 if (!sk) 1007 return 0; 1008 1009 sk->sk_prot->close(sk, 0); 1010 unix_release_sock(sk, 0); 1011 sock->sk = NULL; 1012 1013 return 0; 1014 } 1015 1016 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1017 int type) 1018 { 1019 struct inode *inode; 1020 struct path path; 1021 struct sock *sk; 1022 int err; 1023 1024 unix_mkname_bsd(sunaddr, addr_len); 1025 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1026 if (err) 1027 goto fail; 1028 1029 err = path_permission(&path, MAY_WRITE); 1030 if (err) 1031 goto path_put; 1032 1033 err = -ECONNREFUSED; 1034 inode = d_backing_inode(path.dentry); 1035 if (!S_ISSOCK(inode->i_mode)) 1036 goto path_put; 1037 1038 sk = unix_find_socket_byinode(inode); 1039 if (!sk) 1040 goto path_put; 1041 1042 err = -EPROTOTYPE; 1043 if (sk->sk_type == type) 1044 touch_atime(&path); 1045 else 1046 goto sock_put; 1047 1048 path_put(&path); 1049 1050 return sk; 1051 1052 sock_put: 1053 sock_put(sk); 1054 path_put: 1055 path_put(&path); 1056 fail: 1057 return ERR_PTR(err); 1058 } 1059 1060 static struct sock *unix_find_abstract(struct net *net, 1061 struct sockaddr_un *sunaddr, 1062 int addr_len, int type) 1063 { 1064 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1065 struct dentry *dentry; 1066 struct sock *sk; 1067 1068 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1069 if (!sk) 1070 return ERR_PTR(-ECONNREFUSED); 1071 1072 dentry = unix_sk(sk)->path.dentry; 1073 if (dentry) 1074 touch_atime(&unix_sk(sk)->path); 1075 1076 return sk; 1077 } 1078 1079 static struct sock *unix_find_other(struct net *net, 1080 struct sockaddr_un *sunaddr, 1081 int addr_len, int type) 1082 { 1083 struct sock *sk; 1084 1085 if (sunaddr->sun_path[0]) 1086 sk = unix_find_bsd(sunaddr, addr_len, type); 1087 else 1088 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1089 1090 return sk; 1091 } 1092 1093 static int unix_autobind(struct sock *sk) 1094 { 1095 unsigned int new_hash, old_hash = sk->sk_hash; 1096 struct unix_sock *u = unix_sk(sk); 1097 struct net *net = sock_net(sk); 1098 struct unix_address *addr; 1099 u32 lastnum, ordernum; 1100 int err; 1101 1102 err = mutex_lock_interruptible(&u->bindlock); 1103 if (err) 1104 return err; 1105 1106 if (u->addr) 1107 goto out; 1108 1109 err = -ENOMEM; 1110 addr = kzalloc(sizeof(*addr) + 1111 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1112 if (!addr) 1113 goto out; 1114 1115 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1116 addr->name->sun_family = AF_UNIX; 1117 refcount_set(&addr->refcnt, 1); 1118 1119 ordernum = prandom_u32(); 1120 lastnum = ordernum & 0xFFFFF; 1121 retry: 1122 ordernum = (ordernum + 1) & 0xFFFFF; 1123 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1124 1125 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1126 unix_table_double_lock(net, old_hash, new_hash); 1127 1128 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1129 unix_table_double_unlock(net, old_hash, new_hash); 1130 1131 /* __unix_find_socket_byname() may take long time if many names 1132 * are already in use. 1133 */ 1134 cond_resched(); 1135 1136 if (ordernum == lastnum) { 1137 /* Give up if all names seems to be in use. */ 1138 err = -ENOSPC; 1139 unix_release_addr(addr); 1140 goto out; 1141 } 1142 1143 goto retry; 1144 } 1145 1146 __unix_set_addr_hash(net, sk, addr, new_hash); 1147 unix_table_double_unlock(net, old_hash, new_hash); 1148 err = 0; 1149 1150 out: mutex_unlock(&u->bindlock); 1151 return err; 1152 } 1153 1154 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1155 int addr_len) 1156 { 1157 umode_t mode = S_IFSOCK | 1158 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1159 unsigned int new_hash, old_hash = sk->sk_hash; 1160 struct unix_sock *u = unix_sk(sk); 1161 struct net *net = sock_net(sk); 1162 struct user_namespace *ns; // barf... 1163 struct unix_address *addr; 1164 struct dentry *dentry; 1165 struct path parent; 1166 int err; 1167 1168 unix_mkname_bsd(sunaddr, addr_len); 1169 addr_len = strlen(sunaddr->sun_path) + 1170 offsetof(struct sockaddr_un, sun_path) + 1; 1171 1172 addr = unix_create_addr(sunaddr, addr_len); 1173 if (!addr) 1174 return -ENOMEM; 1175 1176 /* 1177 * Get the parent directory, calculate the hash for last 1178 * component. 1179 */ 1180 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1181 if (IS_ERR(dentry)) { 1182 err = PTR_ERR(dentry); 1183 goto out; 1184 } 1185 1186 /* 1187 * All right, let's create it. 1188 */ 1189 ns = mnt_user_ns(parent.mnt); 1190 err = security_path_mknod(&parent, dentry, mode, 0); 1191 if (!err) 1192 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1193 if (err) 1194 goto out_path; 1195 err = mutex_lock_interruptible(&u->bindlock); 1196 if (err) 1197 goto out_unlink; 1198 if (u->addr) 1199 goto out_unlock; 1200 1201 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1202 unix_table_double_lock(net, old_hash, new_hash); 1203 u->path.mnt = mntget(parent.mnt); 1204 u->path.dentry = dget(dentry); 1205 __unix_set_addr_hash(net, sk, addr, new_hash); 1206 unix_table_double_unlock(net, old_hash, new_hash); 1207 unix_insert_bsd_socket(sk); 1208 mutex_unlock(&u->bindlock); 1209 done_path_create(&parent, dentry); 1210 return 0; 1211 1212 out_unlock: 1213 mutex_unlock(&u->bindlock); 1214 err = -EINVAL; 1215 out_unlink: 1216 /* failed after successful mknod? unlink what we'd created... */ 1217 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1218 out_path: 1219 done_path_create(&parent, dentry); 1220 out: 1221 unix_release_addr(addr); 1222 return err == -EEXIST ? -EADDRINUSE : err; 1223 } 1224 1225 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1226 int addr_len) 1227 { 1228 unsigned int new_hash, old_hash = sk->sk_hash; 1229 struct unix_sock *u = unix_sk(sk); 1230 struct net *net = sock_net(sk); 1231 struct unix_address *addr; 1232 int err; 1233 1234 addr = unix_create_addr(sunaddr, addr_len); 1235 if (!addr) 1236 return -ENOMEM; 1237 1238 err = mutex_lock_interruptible(&u->bindlock); 1239 if (err) 1240 goto out; 1241 1242 if (u->addr) { 1243 err = -EINVAL; 1244 goto out_mutex; 1245 } 1246 1247 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1248 unix_table_double_lock(net, old_hash, new_hash); 1249 1250 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1251 goto out_spin; 1252 1253 __unix_set_addr_hash(net, sk, addr, new_hash); 1254 unix_table_double_unlock(net, old_hash, new_hash); 1255 mutex_unlock(&u->bindlock); 1256 return 0; 1257 1258 out_spin: 1259 unix_table_double_unlock(net, old_hash, new_hash); 1260 err = -EADDRINUSE; 1261 out_mutex: 1262 mutex_unlock(&u->bindlock); 1263 out: 1264 unix_release_addr(addr); 1265 return err; 1266 } 1267 1268 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1269 { 1270 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1271 struct sock *sk = sock->sk; 1272 int err; 1273 1274 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1275 sunaddr->sun_family == AF_UNIX) 1276 return unix_autobind(sk); 1277 1278 err = unix_validate_addr(sunaddr, addr_len); 1279 if (err) 1280 return err; 1281 1282 if (sunaddr->sun_path[0]) 1283 err = unix_bind_bsd(sk, sunaddr, addr_len); 1284 else 1285 err = unix_bind_abstract(sk, sunaddr, addr_len); 1286 1287 return err; 1288 } 1289 1290 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1291 { 1292 if (unlikely(sk1 == sk2) || !sk2) { 1293 unix_state_lock(sk1); 1294 return; 1295 } 1296 if (sk1 < sk2) { 1297 unix_state_lock(sk1); 1298 unix_state_lock_nested(sk2); 1299 } else { 1300 unix_state_lock(sk2); 1301 unix_state_lock_nested(sk1); 1302 } 1303 } 1304 1305 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1306 { 1307 if (unlikely(sk1 == sk2) || !sk2) { 1308 unix_state_unlock(sk1); 1309 return; 1310 } 1311 unix_state_unlock(sk1); 1312 unix_state_unlock(sk2); 1313 } 1314 1315 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1316 int alen, int flags) 1317 { 1318 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1319 struct sock *sk = sock->sk; 1320 struct sock *other; 1321 int err; 1322 1323 err = -EINVAL; 1324 if (alen < offsetofend(struct sockaddr, sa_family)) 1325 goto out; 1326 1327 if (addr->sa_family != AF_UNSPEC) { 1328 err = unix_validate_addr(sunaddr, alen); 1329 if (err) 1330 goto out; 1331 1332 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1333 !unix_sk(sk)->addr) { 1334 err = unix_autobind(sk); 1335 if (err) 1336 goto out; 1337 } 1338 1339 restart: 1340 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1341 if (IS_ERR(other)) { 1342 err = PTR_ERR(other); 1343 goto out; 1344 } 1345 1346 unix_state_double_lock(sk, other); 1347 1348 /* Apparently VFS overslept socket death. Retry. */ 1349 if (sock_flag(other, SOCK_DEAD)) { 1350 unix_state_double_unlock(sk, other); 1351 sock_put(other); 1352 goto restart; 1353 } 1354 1355 err = -EPERM; 1356 if (!unix_may_send(sk, other)) 1357 goto out_unlock; 1358 1359 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1360 if (err) 1361 goto out_unlock; 1362 1363 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1364 } else { 1365 /* 1366 * 1003.1g breaking connected state with AF_UNSPEC 1367 */ 1368 other = NULL; 1369 unix_state_double_lock(sk, other); 1370 } 1371 1372 /* 1373 * If it was connected, reconnect. 1374 */ 1375 if (unix_peer(sk)) { 1376 struct sock *old_peer = unix_peer(sk); 1377 1378 unix_peer(sk) = other; 1379 if (!other) 1380 sk->sk_state = TCP_CLOSE; 1381 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1382 1383 unix_state_double_unlock(sk, other); 1384 1385 if (other != old_peer) 1386 unix_dgram_disconnected(sk, old_peer); 1387 sock_put(old_peer); 1388 } else { 1389 unix_peer(sk) = other; 1390 unix_state_double_unlock(sk, other); 1391 } 1392 1393 return 0; 1394 1395 out_unlock: 1396 unix_state_double_unlock(sk, other); 1397 sock_put(other); 1398 out: 1399 return err; 1400 } 1401 1402 static long unix_wait_for_peer(struct sock *other, long timeo) 1403 __releases(&unix_sk(other)->lock) 1404 { 1405 struct unix_sock *u = unix_sk(other); 1406 int sched; 1407 DEFINE_WAIT(wait); 1408 1409 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1410 1411 sched = !sock_flag(other, SOCK_DEAD) && 1412 !(other->sk_shutdown & RCV_SHUTDOWN) && 1413 unix_recvq_full(other); 1414 1415 unix_state_unlock(other); 1416 1417 if (sched) 1418 timeo = schedule_timeout(timeo); 1419 1420 finish_wait(&u->peer_wait, &wait); 1421 return timeo; 1422 } 1423 1424 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1425 int addr_len, int flags) 1426 { 1427 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1428 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1429 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1430 struct net *net = sock_net(sk); 1431 struct sk_buff *skb = NULL; 1432 long timeo; 1433 int err; 1434 int st; 1435 1436 err = unix_validate_addr(sunaddr, addr_len); 1437 if (err) 1438 goto out; 1439 1440 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1441 err = unix_autobind(sk); 1442 if (err) 1443 goto out; 1444 } 1445 1446 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1447 1448 /* First of all allocate resources. 1449 If we will make it after state is locked, 1450 we will have to recheck all again in any case. 1451 */ 1452 1453 /* create new sock for complete connection */ 1454 newsk = unix_create1(net, NULL, 0, sock->type); 1455 if (IS_ERR(newsk)) { 1456 err = PTR_ERR(newsk); 1457 newsk = NULL; 1458 goto out; 1459 } 1460 1461 err = -ENOMEM; 1462 1463 /* Allocate skb for sending to listening sock */ 1464 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1465 if (skb == NULL) 1466 goto out; 1467 1468 restart: 1469 /* Find listening sock. */ 1470 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1471 if (IS_ERR(other)) { 1472 err = PTR_ERR(other); 1473 other = NULL; 1474 goto out; 1475 } 1476 1477 /* Latch state of peer */ 1478 unix_state_lock(other); 1479 1480 /* Apparently VFS overslept socket death. Retry. */ 1481 if (sock_flag(other, SOCK_DEAD)) { 1482 unix_state_unlock(other); 1483 sock_put(other); 1484 goto restart; 1485 } 1486 1487 err = -ECONNREFUSED; 1488 if (other->sk_state != TCP_LISTEN) 1489 goto out_unlock; 1490 if (other->sk_shutdown & RCV_SHUTDOWN) 1491 goto out_unlock; 1492 1493 if (unix_recvq_full(other)) { 1494 err = -EAGAIN; 1495 if (!timeo) 1496 goto out_unlock; 1497 1498 timeo = unix_wait_for_peer(other, timeo); 1499 1500 err = sock_intr_errno(timeo); 1501 if (signal_pending(current)) 1502 goto out; 1503 sock_put(other); 1504 goto restart; 1505 } 1506 1507 /* Latch our state. 1508 1509 It is tricky place. We need to grab our state lock and cannot 1510 drop lock on peer. It is dangerous because deadlock is 1511 possible. Connect to self case and simultaneous 1512 attempt to connect are eliminated by checking socket 1513 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1514 check this before attempt to grab lock. 1515 1516 Well, and we have to recheck the state after socket locked. 1517 */ 1518 st = sk->sk_state; 1519 1520 switch (st) { 1521 case TCP_CLOSE: 1522 /* This is ok... continue with connect */ 1523 break; 1524 case TCP_ESTABLISHED: 1525 /* Socket is already connected */ 1526 err = -EISCONN; 1527 goto out_unlock; 1528 default: 1529 err = -EINVAL; 1530 goto out_unlock; 1531 } 1532 1533 unix_state_lock_nested(sk); 1534 1535 if (sk->sk_state != st) { 1536 unix_state_unlock(sk); 1537 unix_state_unlock(other); 1538 sock_put(other); 1539 goto restart; 1540 } 1541 1542 err = security_unix_stream_connect(sk, other, newsk); 1543 if (err) { 1544 unix_state_unlock(sk); 1545 goto out_unlock; 1546 } 1547 1548 /* The way is open! Fastly set all the necessary fields... */ 1549 1550 sock_hold(sk); 1551 unix_peer(newsk) = sk; 1552 newsk->sk_state = TCP_ESTABLISHED; 1553 newsk->sk_type = sk->sk_type; 1554 init_peercred(newsk); 1555 newu = unix_sk(newsk); 1556 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1557 otheru = unix_sk(other); 1558 1559 /* copy address information from listening to new sock 1560 * 1561 * The contents of *(otheru->addr) and otheru->path 1562 * are seen fully set up here, since we have found 1563 * otheru in hash under its lock. Insertion into the 1564 * hash chain we'd found it in had been done in an 1565 * earlier critical area protected by the chain's lock, 1566 * the same one where we'd set *(otheru->addr) contents, 1567 * as well as otheru->path and otheru->addr itself. 1568 * 1569 * Using smp_store_release() here to set newu->addr 1570 * is enough to make those stores, as well as stores 1571 * to newu->path visible to anyone who gets newu->addr 1572 * by smp_load_acquire(). IOW, the same warranties 1573 * as for unix_sock instances bound in unix_bind() or 1574 * in unix_autobind(). 1575 */ 1576 if (otheru->path.dentry) { 1577 path_get(&otheru->path); 1578 newu->path = otheru->path; 1579 } 1580 refcount_inc(&otheru->addr->refcnt); 1581 smp_store_release(&newu->addr, otheru->addr); 1582 1583 /* Set credentials */ 1584 copy_peercred(sk, other); 1585 1586 sock->state = SS_CONNECTED; 1587 sk->sk_state = TCP_ESTABLISHED; 1588 sock_hold(newsk); 1589 1590 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1591 unix_peer(sk) = newsk; 1592 1593 unix_state_unlock(sk); 1594 1595 /* take ten and send info to listening sock */ 1596 spin_lock(&other->sk_receive_queue.lock); 1597 __skb_queue_tail(&other->sk_receive_queue, skb); 1598 spin_unlock(&other->sk_receive_queue.lock); 1599 unix_state_unlock(other); 1600 other->sk_data_ready(other); 1601 sock_put(other); 1602 return 0; 1603 1604 out_unlock: 1605 if (other) 1606 unix_state_unlock(other); 1607 1608 out: 1609 kfree_skb(skb); 1610 if (newsk) 1611 unix_release_sock(newsk, 0); 1612 if (other) 1613 sock_put(other); 1614 return err; 1615 } 1616 1617 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1618 { 1619 struct sock *ska = socka->sk, *skb = sockb->sk; 1620 1621 /* Join our sockets back to back */ 1622 sock_hold(ska); 1623 sock_hold(skb); 1624 unix_peer(ska) = skb; 1625 unix_peer(skb) = ska; 1626 init_peercred(ska); 1627 init_peercred(skb); 1628 1629 ska->sk_state = TCP_ESTABLISHED; 1630 skb->sk_state = TCP_ESTABLISHED; 1631 socka->state = SS_CONNECTED; 1632 sockb->state = SS_CONNECTED; 1633 return 0; 1634 } 1635 1636 static void unix_sock_inherit_flags(const struct socket *old, 1637 struct socket *new) 1638 { 1639 if (test_bit(SOCK_PASSCRED, &old->flags)) 1640 set_bit(SOCK_PASSCRED, &new->flags); 1641 if (test_bit(SOCK_PASSSEC, &old->flags)) 1642 set_bit(SOCK_PASSSEC, &new->flags); 1643 } 1644 1645 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1646 bool kern) 1647 { 1648 struct sock *sk = sock->sk; 1649 struct sock *tsk; 1650 struct sk_buff *skb; 1651 int err; 1652 1653 err = -EOPNOTSUPP; 1654 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1655 goto out; 1656 1657 err = -EINVAL; 1658 if (sk->sk_state != TCP_LISTEN) 1659 goto out; 1660 1661 /* If socket state is TCP_LISTEN it cannot change (for now...), 1662 * so that no locks are necessary. 1663 */ 1664 1665 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1666 &err); 1667 if (!skb) { 1668 /* This means receive shutdown. */ 1669 if (err == 0) 1670 err = -EINVAL; 1671 goto out; 1672 } 1673 1674 tsk = skb->sk; 1675 skb_free_datagram(sk, skb); 1676 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1677 1678 /* attach accepted sock to socket */ 1679 unix_state_lock(tsk); 1680 newsock->state = SS_CONNECTED; 1681 unix_sock_inherit_flags(sock, newsock); 1682 sock_graft(tsk, newsock); 1683 unix_state_unlock(tsk); 1684 return 0; 1685 1686 out: 1687 return err; 1688 } 1689 1690 1691 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1692 { 1693 struct sock *sk = sock->sk; 1694 struct unix_address *addr; 1695 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1696 int err = 0; 1697 1698 if (peer) { 1699 sk = unix_peer_get(sk); 1700 1701 err = -ENOTCONN; 1702 if (!sk) 1703 goto out; 1704 err = 0; 1705 } else { 1706 sock_hold(sk); 1707 } 1708 1709 addr = smp_load_acquire(&unix_sk(sk)->addr); 1710 if (!addr) { 1711 sunaddr->sun_family = AF_UNIX; 1712 sunaddr->sun_path[0] = 0; 1713 err = offsetof(struct sockaddr_un, sun_path); 1714 } else { 1715 err = addr->len; 1716 memcpy(sunaddr, addr->name, addr->len); 1717 } 1718 sock_put(sk); 1719 out: 1720 return err; 1721 } 1722 1723 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1724 { 1725 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1726 1727 /* 1728 * Garbage collection of unix sockets starts by selecting a set of 1729 * candidate sockets which have reference only from being in flight 1730 * (total_refs == inflight_refs). This condition is checked once during 1731 * the candidate collection phase, and candidates are marked as such, so 1732 * that non-candidates can later be ignored. While inflight_refs is 1733 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1734 * is an instantaneous decision. 1735 * 1736 * Once a candidate, however, the socket must not be reinstalled into a 1737 * file descriptor while the garbage collection is in progress. 1738 * 1739 * If the above conditions are met, then the directed graph of 1740 * candidates (*) does not change while unix_gc_lock is held. 1741 * 1742 * Any operations that changes the file count through file descriptors 1743 * (dup, close, sendmsg) does not change the graph since candidates are 1744 * not installed in fds. 1745 * 1746 * Dequeing a candidate via recvmsg would install it into an fd, but 1747 * that takes unix_gc_lock to decrement the inflight count, so it's 1748 * serialized with garbage collection. 1749 * 1750 * MSG_PEEK is special in that it does not change the inflight count, 1751 * yet does install the socket into an fd. The following lock/unlock 1752 * pair is to ensure serialization with garbage collection. It must be 1753 * done between incrementing the file count and installing the file into 1754 * an fd. 1755 * 1756 * If garbage collection starts after the barrier provided by the 1757 * lock/unlock, then it will see the elevated refcount and not mark this 1758 * as a candidate. If a garbage collection is already in progress 1759 * before the file count was incremented, then the lock/unlock pair will 1760 * ensure that garbage collection is finished before progressing to 1761 * installing the fd. 1762 * 1763 * (*) A -> B where B is on the queue of A or B is on the queue of C 1764 * which is on the queue of listening socket A. 1765 */ 1766 spin_lock(&unix_gc_lock); 1767 spin_unlock(&unix_gc_lock); 1768 } 1769 1770 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1771 { 1772 int err = 0; 1773 1774 UNIXCB(skb).pid = get_pid(scm->pid); 1775 UNIXCB(skb).uid = scm->creds.uid; 1776 UNIXCB(skb).gid = scm->creds.gid; 1777 UNIXCB(skb).fp = NULL; 1778 unix_get_secdata(scm, skb); 1779 if (scm->fp && send_fds) 1780 err = unix_attach_fds(scm, skb); 1781 1782 skb->destructor = unix_destruct_scm; 1783 return err; 1784 } 1785 1786 static bool unix_passcred_enabled(const struct socket *sock, 1787 const struct sock *other) 1788 { 1789 return test_bit(SOCK_PASSCRED, &sock->flags) || 1790 !other->sk_socket || 1791 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1792 } 1793 1794 /* 1795 * Some apps rely on write() giving SCM_CREDENTIALS 1796 * We include credentials if source or destination socket 1797 * asserted SOCK_PASSCRED. 1798 */ 1799 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1800 const struct sock *other) 1801 { 1802 if (UNIXCB(skb).pid) 1803 return; 1804 if (unix_passcred_enabled(sock, other)) { 1805 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1806 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1807 } 1808 } 1809 1810 static int maybe_init_creds(struct scm_cookie *scm, 1811 struct socket *socket, 1812 const struct sock *other) 1813 { 1814 int err; 1815 struct msghdr msg = { .msg_controllen = 0 }; 1816 1817 err = scm_send(socket, &msg, scm, false); 1818 if (err) 1819 return err; 1820 1821 if (unix_passcred_enabled(socket, other)) { 1822 scm->pid = get_pid(task_tgid(current)); 1823 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1824 } 1825 return err; 1826 } 1827 1828 static bool unix_skb_scm_eq(struct sk_buff *skb, 1829 struct scm_cookie *scm) 1830 { 1831 return UNIXCB(skb).pid == scm->pid && 1832 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1833 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1834 unix_secdata_eq(scm, skb); 1835 } 1836 1837 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1838 { 1839 struct scm_fp_list *fp = UNIXCB(skb).fp; 1840 struct unix_sock *u = unix_sk(sk); 1841 1842 if (unlikely(fp && fp->count)) 1843 atomic_add(fp->count, &u->scm_stat.nr_fds); 1844 } 1845 1846 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1847 { 1848 struct scm_fp_list *fp = UNIXCB(skb).fp; 1849 struct unix_sock *u = unix_sk(sk); 1850 1851 if (unlikely(fp && fp->count)) 1852 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1853 } 1854 1855 /* 1856 * Send AF_UNIX data. 1857 */ 1858 1859 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1860 size_t len) 1861 { 1862 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1863 struct sock *sk = sock->sk, *other = NULL; 1864 struct unix_sock *u = unix_sk(sk); 1865 struct scm_cookie scm; 1866 struct sk_buff *skb; 1867 int data_len = 0; 1868 int sk_locked; 1869 long timeo; 1870 int err; 1871 1872 wait_for_unix_gc(); 1873 err = scm_send(sock, msg, &scm, false); 1874 if (err < 0) 1875 return err; 1876 1877 err = -EOPNOTSUPP; 1878 if (msg->msg_flags&MSG_OOB) 1879 goto out; 1880 1881 if (msg->msg_namelen) { 1882 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1883 if (err) 1884 goto out; 1885 } else { 1886 sunaddr = NULL; 1887 err = -ENOTCONN; 1888 other = unix_peer_get(sk); 1889 if (!other) 1890 goto out; 1891 } 1892 1893 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1894 err = unix_autobind(sk); 1895 if (err) 1896 goto out; 1897 } 1898 1899 err = -EMSGSIZE; 1900 if (len > sk->sk_sndbuf - 32) 1901 goto out; 1902 1903 if (len > SKB_MAX_ALLOC) { 1904 data_len = min_t(size_t, 1905 len - SKB_MAX_ALLOC, 1906 MAX_SKB_FRAGS * PAGE_SIZE); 1907 data_len = PAGE_ALIGN(data_len); 1908 1909 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1910 } 1911 1912 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1913 msg->msg_flags & MSG_DONTWAIT, &err, 1914 PAGE_ALLOC_COSTLY_ORDER); 1915 if (skb == NULL) 1916 goto out; 1917 1918 err = unix_scm_to_skb(&scm, skb, true); 1919 if (err < 0) 1920 goto out_free; 1921 1922 skb_put(skb, len - data_len); 1923 skb->data_len = data_len; 1924 skb->len = len; 1925 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1926 if (err) 1927 goto out_free; 1928 1929 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1930 1931 restart: 1932 if (!other) { 1933 err = -ECONNRESET; 1934 if (sunaddr == NULL) 1935 goto out_free; 1936 1937 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1938 sk->sk_type); 1939 if (IS_ERR(other)) { 1940 err = PTR_ERR(other); 1941 other = NULL; 1942 goto out_free; 1943 } 1944 } 1945 1946 if (sk_filter(other, skb) < 0) { 1947 /* Toss the packet but do not return any error to the sender */ 1948 err = len; 1949 goto out_free; 1950 } 1951 1952 sk_locked = 0; 1953 unix_state_lock(other); 1954 restart_locked: 1955 err = -EPERM; 1956 if (!unix_may_send(sk, other)) 1957 goto out_unlock; 1958 1959 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1960 /* 1961 * Check with 1003.1g - what should 1962 * datagram error 1963 */ 1964 unix_state_unlock(other); 1965 sock_put(other); 1966 1967 if (!sk_locked) 1968 unix_state_lock(sk); 1969 1970 err = 0; 1971 if (unix_peer(sk) == other) { 1972 unix_peer(sk) = NULL; 1973 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1974 1975 unix_state_unlock(sk); 1976 1977 sk->sk_state = TCP_CLOSE; 1978 unix_dgram_disconnected(sk, other); 1979 sock_put(other); 1980 err = -ECONNREFUSED; 1981 } else { 1982 unix_state_unlock(sk); 1983 } 1984 1985 other = NULL; 1986 if (err) 1987 goto out_free; 1988 goto restart; 1989 } 1990 1991 err = -EPIPE; 1992 if (other->sk_shutdown & RCV_SHUTDOWN) 1993 goto out_unlock; 1994 1995 if (sk->sk_type != SOCK_SEQPACKET) { 1996 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1997 if (err) 1998 goto out_unlock; 1999 } 2000 2001 /* other == sk && unix_peer(other) != sk if 2002 * - unix_peer(sk) == NULL, destination address bound to sk 2003 * - unix_peer(sk) == sk by time of get but disconnected before lock 2004 */ 2005 if (other != sk && 2006 unlikely(unix_peer(other) != sk && 2007 unix_recvq_full_lockless(other))) { 2008 if (timeo) { 2009 timeo = unix_wait_for_peer(other, timeo); 2010 2011 err = sock_intr_errno(timeo); 2012 if (signal_pending(current)) 2013 goto out_free; 2014 2015 goto restart; 2016 } 2017 2018 if (!sk_locked) { 2019 unix_state_unlock(other); 2020 unix_state_double_lock(sk, other); 2021 } 2022 2023 if (unix_peer(sk) != other || 2024 unix_dgram_peer_wake_me(sk, other)) { 2025 err = -EAGAIN; 2026 sk_locked = 1; 2027 goto out_unlock; 2028 } 2029 2030 if (!sk_locked) { 2031 sk_locked = 1; 2032 goto restart_locked; 2033 } 2034 } 2035 2036 if (unlikely(sk_locked)) 2037 unix_state_unlock(sk); 2038 2039 if (sock_flag(other, SOCK_RCVTSTAMP)) 2040 __net_timestamp(skb); 2041 maybe_add_creds(skb, sock, other); 2042 scm_stat_add(other, skb); 2043 skb_queue_tail(&other->sk_receive_queue, skb); 2044 unix_state_unlock(other); 2045 other->sk_data_ready(other); 2046 sock_put(other); 2047 scm_destroy(&scm); 2048 return len; 2049 2050 out_unlock: 2051 if (sk_locked) 2052 unix_state_unlock(sk); 2053 unix_state_unlock(other); 2054 out_free: 2055 kfree_skb(skb); 2056 out: 2057 if (other) 2058 sock_put(other); 2059 scm_destroy(&scm); 2060 return err; 2061 } 2062 2063 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2064 * bytes, and a minimum of a full page. 2065 */ 2066 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2067 2068 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2069 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 2070 { 2071 struct unix_sock *ousk = unix_sk(other); 2072 struct sk_buff *skb; 2073 int err = 0; 2074 2075 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2076 2077 if (!skb) 2078 return err; 2079 2080 skb_put(skb, 1); 2081 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2082 2083 if (err) { 2084 kfree_skb(skb); 2085 return err; 2086 } 2087 2088 unix_state_lock(other); 2089 2090 if (sock_flag(other, SOCK_DEAD) || 2091 (other->sk_shutdown & RCV_SHUTDOWN)) { 2092 unix_state_unlock(other); 2093 kfree_skb(skb); 2094 return -EPIPE; 2095 } 2096 2097 maybe_add_creds(skb, sock, other); 2098 skb_get(skb); 2099 2100 if (ousk->oob_skb) 2101 consume_skb(ousk->oob_skb); 2102 2103 WRITE_ONCE(ousk->oob_skb, skb); 2104 2105 scm_stat_add(other, skb); 2106 skb_queue_tail(&other->sk_receive_queue, skb); 2107 sk_send_sigurg(other); 2108 unix_state_unlock(other); 2109 other->sk_data_ready(other); 2110 2111 return err; 2112 } 2113 #endif 2114 2115 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2116 size_t len) 2117 { 2118 struct sock *sk = sock->sk; 2119 struct sock *other = NULL; 2120 int err, size; 2121 struct sk_buff *skb; 2122 int sent = 0; 2123 struct scm_cookie scm; 2124 bool fds_sent = false; 2125 int data_len; 2126 2127 wait_for_unix_gc(); 2128 err = scm_send(sock, msg, &scm, false); 2129 if (err < 0) 2130 return err; 2131 2132 err = -EOPNOTSUPP; 2133 if (msg->msg_flags & MSG_OOB) { 2134 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2135 if (len) 2136 len--; 2137 else 2138 #endif 2139 goto out_err; 2140 } 2141 2142 if (msg->msg_namelen) { 2143 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2144 goto out_err; 2145 } else { 2146 err = -ENOTCONN; 2147 other = unix_peer(sk); 2148 if (!other) 2149 goto out_err; 2150 } 2151 2152 if (sk->sk_shutdown & SEND_SHUTDOWN) 2153 goto pipe_err; 2154 2155 while (sent < len) { 2156 size = len - sent; 2157 2158 /* Keep two messages in the pipe so it schedules better */ 2159 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2160 2161 /* allow fallback to order-0 allocations */ 2162 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2163 2164 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2165 2166 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2167 2168 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2169 msg->msg_flags & MSG_DONTWAIT, &err, 2170 get_order(UNIX_SKB_FRAGS_SZ)); 2171 if (!skb) 2172 goto out_err; 2173 2174 /* Only send the fds in the first buffer */ 2175 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2176 if (err < 0) { 2177 kfree_skb(skb); 2178 goto out_err; 2179 } 2180 fds_sent = true; 2181 2182 skb_put(skb, size - data_len); 2183 skb->data_len = data_len; 2184 skb->len = size; 2185 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2186 if (err) { 2187 kfree_skb(skb); 2188 goto out_err; 2189 } 2190 2191 unix_state_lock(other); 2192 2193 if (sock_flag(other, SOCK_DEAD) || 2194 (other->sk_shutdown & RCV_SHUTDOWN)) 2195 goto pipe_err_free; 2196 2197 maybe_add_creds(skb, sock, other); 2198 scm_stat_add(other, skb); 2199 skb_queue_tail(&other->sk_receive_queue, skb); 2200 unix_state_unlock(other); 2201 other->sk_data_ready(other); 2202 sent += size; 2203 } 2204 2205 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2206 if (msg->msg_flags & MSG_OOB) { 2207 err = queue_oob(sock, msg, other); 2208 if (err) 2209 goto out_err; 2210 sent++; 2211 } 2212 #endif 2213 2214 scm_destroy(&scm); 2215 2216 return sent; 2217 2218 pipe_err_free: 2219 unix_state_unlock(other); 2220 kfree_skb(skb); 2221 pipe_err: 2222 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2223 send_sig(SIGPIPE, current, 0); 2224 err = -EPIPE; 2225 out_err: 2226 scm_destroy(&scm); 2227 return sent ? : err; 2228 } 2229 2230 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2231 int offset, size_t size, int flags) 2232 { 2233 int err; 2234 bool send_sigpipe = false; 2235 bool init_scm = true; 2236 struct scm_cookie scm; 2237 struct sock *other, *sk = socket->sk; 2238 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2239 2240 if (flags & MSG_OOB) 2241 return -EOPNOTSUPP; 2242 2243 other = unix_peer(sk); 2244 if (!other || sk->sk_state != TCP_ESTABLISHED) 2245 return -ENOTCONN; 2246 2247 if (false) { 2248 alloc_skb: 2249 unix_state_unlock(other); 2250 mutex_unlock(&unix_sk(other)->iolock); 2251 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2252 &err, 0); 2253 if (!newskb) 2254 goto err; 2255 } 2256 2257 /* we must acquire iolock as we modify already present 2258 * skbs in the sk_receive_queue and mess with skb->len 2259 */ 2260 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2261 if (err) { 2262 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2263 goto err; 2264 } 2265 2266 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2267 err = -EPIPE; 2268 send_sigpipe = true; 2269 goto err_unlock; 2270 } 2271 2272 unix_state_lock(other); 2273 2274 if (sock_flag(other, SOCK_DEAD) || 2275 other->sk_shutdown & RCV_SHUTDOWN) { 2276 err = -EPIPE; 2277 send_sigpipe = true; 2278 goto err_state_unlock; 2279 } 2280 2281 if (init_scm) { 2282 err = maybe_init_creds(&scm, socket, other); 2283 if (err) 2284 goto err_state_unlock; 2285 init_scm = false; 2286 } 2287 2288 skb = skb_peek_tail(&other->sk_receive_queue); 2289 if (tail && tail == skb) { 2290 skb = newskb; 2291 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2292 if (newskb) { 2293 skb = newskb; 2294 } else { 2295 tail = skb; 2296 goto alloc_skb; 2297 } 2298 } else if (newskb) { 2299 /* this is fast path, we don't necessarily need to 2300 * call to kfree_skb even though with newskb == NULL 2301 * this - does no harm 2302 */ 2303 consume_skb(newskb); 2304 newskb = NULL; 2305 } 2306 2307 if (skb_append_pagefrags(skb, page, offset, size)) { 2308 tail = skb; 2309 goto alloc_skb; 2310 } 2311 2312 skb->len += size; 2313 skb->data_len += size; 2314 skb->truesize += size; 2315 refcount_add(size, &sk->sk_wmem_alloc); 2316 2317 if (newskb) { 2318 err = unix_scm_to_skb(&scm, skb, false); 2319 if (err) 2320 goto err_state_unlock; 2321 spin_lock(&other->sk_receive_queue.lock); 2322 __skb_queue_tail(&other->sk_receive_queue, newskb); 2323 spin_unlock(&other->sk_receive_queue.lock); 2324 } 2325 2326 unix_state_unlock(other); 2327 mutex_unlock(&unix_sk(other)->iolock); 2328 2329 other->sk_data_ready(other); 2330 scm_destroy(&scm); 2331 return size; 2332 2333 err_state_unlock: 2334 unix_state_unlock(other); 2335 err_unlock: 2336 mutex_unlock(&unix_sk(other)->iolock); 2337 err: 2338 kfree_skb(newskb); 2339 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2340 send_sig(SIGPIPE, current, 0); 2341 if (!init_scm) 2342 scm_destroy(&scm); 2343 return err; 2344 } 2345 2346 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2347 size_t len) 2348 { 2349 int err; 2350 struct sock *sk = sock->sk; 2351 2352 err = sock_error(sk); 2353 if (err) 2354 return err; 2355 2356 if (sk->sk_state != TCP_ESTABLISHED) 2357 return -ENOTCONN; 2358 2359 if (msg->msg_namelen) 2360 msg->msg_namelen = 0; 2361 2362 return unix_dgram_sendmsg(sock, msg, len); 2363 } 2364 2365 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2366 size_t size, int flags) 2367 { 2368 struct sock *sk = sock->sk; 2369 2370 if (sk->sk_state != TCP_ESTABLISHED) 2371 return -ENOTCONN; 2372 2373 return unix_dgram_recvmsg(sock, msg, size, flags); 2374 } 2375 2376 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2377 { 2378 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2379 2380 if (addr) { 2381 msg->msg_namelen = addr->len; 2382 memcpy(msg->msg_name, addr->name, addr->len); 2383 } 2384 } 2385 2386 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2387 int flags) 2388 { 2389 struct scm_cookie scm; 2390 struct socket *sock = sk->sk_socket; 2391 struct unix_sock *u = unix_sk(sk); 2392 struct sk_buff *skb, *last; 2393 long timeo; 2394 int skip; 2395 int err; 2396 2397 err = -EOPNOTSUPP; 2398 if (flags&MSG_OOB) 2399 goto out; 2400 2401 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2402 2403 do { 2404 mutex_lock(&u->iolock); 2405 2406 skip = sk_peek_offset(sk, flags); 2407 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2408 &skip, &err, &last); 2409 if (skb) { 2410 if (!(flags & MSG_PEEK)) 2411 scm_stat_del(sk, skb); 2412 break; 2413 } 2414 2415 mutex_unlock(&u->iolock); 2416 2417 if (err != -EAGAIN) 2418 break; 2419 } while (timeo && 2420 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2421 &err, &timeo, last)); 2422 2423 if (!skb) { /* implies iolock unlocked */ 2424 unix_state_lock(sk); 2425 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2426 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2427 (sk->sk_shutdown & RCV_SHUTDOWN)) 2428 err = 0; 2429 unix_state_unlock(sk); 2430 goto out; 2431 } 2432 2433 if (wq_has_sleeper(&u->peer_wait)) 2434 wake_up_interruptible_sync_poll(&u->peer_wait, 2435 EPOLLOUT | EPOLLWRNORM | 2436 EPOLLWRBAND); 2437 2438 if (msg->msg_name) 2439 unix_copy_addr(msg, skb->sk); 2440 2441 if (size > skb->len - skip) 2442 size = skb->len - skip; 2443 else if (size < skb->len - skip) 2444 msg->msg_flags |= MSG_TRUNC; 2445 2446 err = skb_copy_datagram_msg(skb, skip, msg, size); 2447 if (err) 2448 goto out_free; 2449 2450 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2451 __sock_recv_timestamp(msg, sk, skb); 2452 2453 memset(&scm, 0, sizeof(scm)); 2454 2455 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2456 unix_set_secdata(&scm, skb); 2457 2458 if (!(flags & MSG_PEEK)) { 2459 if (UNIXCB(skb).fp) 2460 unix_detach_fds(&scm, skb); 2461 2462 sk_peek_offset_bwd(sk, skb->len); 2463 } else { 2464 /* It is questionable: on PEEK we could: 2465 - do not return fds - good, but too simple 8) 2466 - return fds, and do not return them on read (old strategy, 2467 apparently wrong) 2468 - clone fds (I chose it for now, it is the most universal 2469 solution) 2470 2471 POSIX 1003.1g does not actually define this clearly 2472 at all. POSIX 1003.1g doesn't define a lot of things 2473 clearly however! 2474 2475 */ 2476 2477 sk_peek_offset_fwd(sk, size); 2478 2479 if (UNIXCB(skb).fp) 2480 unix_peek_fds(&scm, skb); 2481 } 2482 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2483 2484 scm_recv(sock, msg, &scm, flags); 2485 2486 out_free: 2487 skb_free_datagram(sk, skb); 2488 mutex_unlock(&u->iolock); 2489 out: 2490 return err; 2491 } 2492 2493 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2494 int flags) 2495 { 2496 struct sock *sk = sock->sk; 2497 2498 #ifdef CONFIG_BPF_SYSCALL 2499 const struct proto *prot = READ_ONCE(sk->sk_prot); 2500 2501 if (prot != &unix_dgram_proto) 2502 return prot->recvmsg(sk, msg, size, flags, NULL); 2503 #endif 2504 return __unix_dgram_recvmsg(sk, msg, size, flags); 2505 } 2506 2507 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2508 { 2509 int copied = 0; 2510 2511 while (1) { 2512 struct unix_sock *u = unix_sk(sk); 2513 struct sk_buff *skb; 2514 int used, err; 2515 2516 mutex_lock(&u->iolock); 2517 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2518 mutex_unlock(&u->iolock); 2519 if (!skb) 2520 return err; 2521 2522 used = recv_actor(sk, skb); 2523 if (used <= 0) { 2524 if (!copied) 2525 copied = used; 2526 kfree_skb(skb); 2527 break; 2528 } else if (used <= skb->len) { 2529 copied += used; 2530 } 2531 2532 kfree_skb(skb); 2533 break; 2534 } 2535 2536 return copied; 2537 } 2538 2539 /* 2540 * Sleep until more data has arrived. But check for races.. 2541 */ 2542 static long unix_stream_data_wait(struct sock *sk, long timeo, 2543 struct sk_buff *last, unsigned int last_len, 2544 bool freezable) 2545 { 2546 struct sk_buff *tail; 2547 DEFINE_WAIT(wait); 2548 2549 unix_state_lock(sk); 2550 2551 for (;;) { 2552 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2553 2554 tail = skb_peek_tail(&sk->sk_receive_queue); 2555 if (tail != last || 2556 (tail && tail->len != last_len) || 2557 sk->sk_err || 2558 (sk->sk_shutdown & RCV_SHUTDOWN) || 2559 signal_pending(current) || 2560 !timeo) 2561 break; 2562 2563 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2564 unix_state_unlock(sk); 2565 if (freezable) 2566 timeo = freezable_schedule_timeout(timeo); 2567 else 2568 timeo = schedule_timeout(timeo); 2569 unix_state_lock(sk); 2570 2571 if (sock_flag(sk, SOCK_DEAD)) 2572 break; 2573 2574 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2575 } 2576 2577 finish_wait(sk_sleep(sk), &wait); 2578 unix_state_unlock(sk); 2579 return timeo; 2580 } 2581 2582 static unsigned int unix_skb_len(const struct sk_buff *skb) 2583 { 2584 return skb->len - UNIXCB(skb).consumed; 2585 } 2586 2587 struct unix_stream_read_state { 2588 int (*recv_actor)(struct sk_buff *, int, int, 2589 struct unix_stream_read_state *); 2590 struct socket *socket; 2591 struct msghdr *msg; 2592 struct pipe_inode_info *pipe; 2593 size_t size; 2594 int flags; 2595 unsigned int splice_flags; 2596 }; 2597 2598 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2599 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2600 { 2601 struct socket *sock = state->socket; 2602 struct sock *sk = sock->sk; 2603 struct unix_sock *u = unix_sk(sk); 2604 int chunk = 1; 2605 struct sk_buff *oob_skb; 2606 2607 mutex_lock(&u->iolock); 2608 unix_state_lock(sk); 2609 2610 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2611 unix_state_unlock(sk); 2612 mutex_unlock(&u->iolock); 2613 return -EINVAL; 2614 } 2615 2616 oob_skb = u->oob_skb; 2617 2618 if (!(state->flags & MSG_PEEK)) 2619 WRITE_ONCE(u->oob_skb, NULL); 2620 2621 unix_state_unlock(sk); 2622 2623 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2624 2625 if (!(state->flags & MSG_PEEK)) { 2626 UNIXCB(oob_skb).consumed += 1; 2627 kfree_skb(oob_skb); 2628 } 2629 2630 mutex_unlock(&u->iolock); 2631 2632 if (chunk < 0) 2633 return -EFAULT; 2634 2635 state->msg->msg_flags |= MSG_OOB; 2636 return 1; 2637 } 2638 2639 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2640 int flags, int copied) 2641 { 2642 struct unix_sock *u = unix_sk(sk); 2643 2644 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2645 skb_unlink(skb, &sk->sk_receive_queue); 2646 consume_skb(skb); 2647 skb = NULL; 2648 } else { 2649 if (skb == u->oob_skb) { 2650 if (copied) { 2651 skb = NULL; 2652 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2653 if (!(flags & MSG_PEEK)) { 2654 WRITE_ONCE(u->oob_skb, NULL); 2655 consume_skb(skb); 2656 } 2657 } else if (!(flags & MSG_PEEK)) { 2658 skb_unlink(skb, &sk->sk_receive_queue); 2659 consume_skb(skb); 2660 skb = skb_peek(&sk->sk_receive_queue); 2661 } 2662 } 2663 } 2664 return skb; 2665 } 2666 #endif 2667 2668 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2669 { 2670 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2671 return -ENOTCONN; 2672 2673 return unix_read_skb(sk, recv_actor); 2674 } 2675 2676 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2677 bool freezable) 2678 { 2679 struct scm_cookie scm; 2680 struct socket *sock = state->socket; 2681 struct sock *sk = sock->sk; 2682 struct unix_sock *u = unix_sk(sk); 2683 int copied = 0; 2684 int flags = state->flags; 2685 int noblock = flags & MSG_DONTWAIT; 2686 bool check_creds = false; 2687 int target; 2688 int err = 0; 2689 long timeo; 2690 int skip; 2691 size_t size = state->size; 2692 unsigned int last_len; 2693 2694 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2695 err = -EINVAL; 2696 goto out; 2697 } 2698 2699 if (unlikely(flags & MSG_OOB)) { 2700 err = -EOPNOTSUPP; 2701 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2702 err = unix_stream_recv_urg(state); 2703 #endif 2704 goto out; 2705 } 2706 2707 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2708 timeo = sock_rcvtimeo(sk, noblock); 2709 2710 memset(&scm, 0, sizeof(scm)); 2711 2712 /* Lock the socket to prevent queue disordering 2713 * while sleeps in memcpy_tomsg 2714 */ 2715 mutex_lock(&u->iolock); 2716 2717 skip = max(sk_peek_offset(sk, flags), 0); 2718 2719 do { 2720 int chunk; 2721 bool drop_skb; 2722 struct sk_buff *skb, *last; 2723 2724 redo: 2725 unix_state_lock(sk); 2726 if (sock_flag(sk, SOCK_DEAD)) { 2727 err = -ECONNRESET; 2728 goto unlock; 2729 } 2730 last = skb = skb_peek(&sk->sk_receive_queue); 2731 last_len = last ? last->len : 0; 2732 2733 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2734 if (skb) { 2735 skb = manage_oob(skb, sk, flags, copied); 2736 if (!skb) { 2737 unix_state_unlock(sk); 2738 if (copied) 2739 break; 2740 goto redo; 2741 } 2742 } 2743 #endif 2744 again: 2745 if (skb == NULL) { 2746 if (copied >= target) 2747 goto unlock; 2748 2749 /* 2750 * POSIX 1003.1g mandates this order. 2751 */ 2752 2753 err = sock_error(sk); 2754 if (err) 2755 goto unlock; 2756 if (sk->sk_shutdown & RCV_SHUTDOWN) 2757 goto unlock; 2758 2759 unix_state_unlock(sk); 2760 if (!timeo) { 2761 err = -EAGAIN; 2762 break; 2763 } 2764 2765 mutex_unlock(&u->iolock); 2766 2767 timeo = unix_stream_data_wait(sk, timeo, last, 2768 last_len, freezable); 2769 2770 if (signal_pending(current)) { 2771 err = sock_intr_errno(timeo); 2772 scm_destroy(&scm); 2773 goto out; 2774 } 2775 2776 mutex_lock(&u->iolock); 2777 goto redo; 2778 unlock: 2779 unix_state_unlock(sk); 2780 break; 2781 } 2782 2783 while (skip >= unix_skb_len(skb)) { 2784 skip -= unix_skb_len(skb); 2785 last = skb; 2786 last_len = skb->len; 2787 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2788 if (!skb) 2789 goto again; 2790 } 2791 2792 unix_state_unlock(sk); 2793 2794 if (check_creds) { 2795 /* Never glue messages from different writers */ 2796 if (!unix_skb_scm_eq(skb, &scm)) 2797 break; 2798 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2799 /* Copy credentials */ 2800 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2801 unix_set_secdata(&scm, skb); 2802 check_creds = true; 2803 } 2804 2805 /* Copy address just once */ 2806 if (state->msg && state->msg->msg_name) { 2807 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2808 state->msg->msg_name); 2809 unix_copy_addr(state->msg, skb->sk); 2810 sunaddr = NULL; 2811 } 2812 2813 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2814 skb_get(skb); 2815 chunk = state->recv_actor(skb, skip, chunk, state); 2816 drop_skb = !unix_skb_len(skb); 2817 /* skb is only safe to use if !drop_skb */ 2818 consume_skb(skb); 2819 if (chunk < 0) { 2820 if (copied == 0) 2821 copied = -EFAULT; 2822 break; 2823 } 2824 copied += chunk; 2825 size -= chunk; 2826 2827 if (drop_skb) { 2828 /* the skb was touched by a concurrent reader; 2829 * we should not expect anything from this skb 2830 * anymore and assume it invalid - we can be 2831 * sure it was dropped from the socket queue 2832 * 2833 * let's report a short read 2834 */ 2835 err = 0; 2836 break; 2837 } 2838 2839 /* Mark read part of skb as used */ 2840 if (!(flags & MSG_PEEK)) { 2841 UNIXCB(skb).consumed += chunk; 2842 2843 sk_peek_offset_bwd(sk, chunk); 2844 2845 if (UNIXCB(skb).fp) { 2846 scm_stat_del(sk, skb); 2847 unix_detach_fds(&scm, skb); 2848 } 2849 2850 if (unix_skb_len(skb)) 2851 break; 2852 2853 skb_unlink(skb, &sk->sk_receive_queue); 2854 consume_skb(skb); 2855 2856 if (scm.fp) 2857 break; 2858 } else { 2859 /* It is questionable, see note in unix_dgram_recvmsg. 2860 */ 2861 if (UNIXCB(skb).fp) 2862 unix_peek_fds(&scm, skb); 2863 2864 sk_peek_offset_fwd(sk, chunk); 2865 2866 if (UNIXCB(skb).fp) 2867 break; 2868 2869 skip = 0; 2870 last = skb; 2871 last_len = skb->len; 2872 unix_state_lock(sk); 2873 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2874 if (skb) 2875 goto again; 2876 unix_state_unlock(sk); 2877 break; 2878 } 2879 } while (size); 2880 2881 mutex_unlock(&u->iolock); 2882 if (state->msg) 2883 scm_recv(sock, state->msg, &scm, flags); 2884 else 2885 scm_destroy(&scm); 2886 out: 2887 return copied ? : err; 2888 } 2889 2890 static int unix_stream_read_actor(struct sk_buff *skb, 2891 int skip, int chunk, 2892 struct unix_stream_read_state *state) 2893 { 2894 int ret; 2895 2896 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2897 state->msg, chunk); 2898 return ret ?: chunk; 2899 } 2900 2901 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2902 size_t size, int flags) 2903 { 2904 struct unix_stream_read_state state = { 2905 .recv_actor = unix_stream_read_actor, 2906 .socket = sk->sk_socket, 2907 .msg = msg, 2908 .size = size, 2909 .flags = flags 2910 }; 2911 2912 return unix_stream_read_generic(&state, true); 2913 } 2914 2915 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2916 size_t size, int flags) 2917 { 2918 struct unix_stream_read_state state = { 2919 .recv_actor = unix_stream_read_actor, 2920 .socket = sock, 2921 .msg = msg, 2922 .size = size, 2923 .flags = flags 2924 }; 2925 2926 #ifdef CONFIG_BPF_SYSCALL 2927 struct sock *sk = sock->sk; 2928 const struct proto *prot = READ_ONCE(sk->sk_prot); 2929 2930 if (prot != &unix_stream_proto) 2931 return prot->recvmsg(sk, msg, size, flags, NULL); 2932 #endif 2933 return unix_stream_read_generic(&state, true); 2934 } 2935 2936 static int unix_stream_splice_actor(struct sk_buff *skb, 2937 int skip, int chunk, 2938 struct unix_stream_read_state *state) 2939 { 2940 return skb_splice_bits(skb, state->socket->sk, 2941 UNIXCB(skb).consumed + skip, 2942 state->pipe, chunk, state->splice_flags); 2943 } 2944 2945 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2946 struct pipe_inode_info *pipe, 2947 size_t size, unsigned int flags) 2948 { 2949 struct unix_stream_read_state state = { 2950 .recv_actor = unix_stream_splice_actor, 2951 .socket = sock, 2952 .pipe = pipe, 2953 .size = size, 2954 .splice_flags = flags, 2955 }; 2956 2957 if (unlikely(*ppos)) 2958 return -ESPIPE; 2959 2960 if (sock->file->f_flags & O_NONBLOCK || 2961 flags & SPLICE_F_NONBLOCK) 2962 state.flags = MSG_DONTWAIT; 2963 2964 return unix_stream_read_generic(&state, false); 2965 } 2966 2967 static int unix_shutdown(struct socket *sock, int mode) 2968 { 2969 struct sock *sk = sock->sk; 2970 struct sock *other; 2971 2972 if (mode < SHUT_RD || mode > SHUT_RDWR) 2973 return -EINVAL; 2974 /* This maps: 2975 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2976 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2977 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2978 */ 2979 ++mode; 2980 2981 unix_state_lock(sk); 2982 sk->sk_shutdown |= mode; 2983 other = unix_peer(sk); 2984 if (other) 2985 sock_hold(other); 2986 unix_state_unlock(sk); 2987 sk->sk_state_change(sk); 2988 2989 if (other && 2990 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2991 2992 int peer_mode = 0; 2993 const struct proto *prot = READ_ONCE(other->sk_prot); 2994 2995 if (prot->unhash) 2996 prot->unhash(other); 2997 if (mode&RCV_SHUTDOWN) 2998 peer_mode |= SEND_SHUTDOWN; 2999 if (mode&SEND_SHUTDOWN) 3000 peer_mode |= RCV_SHUTDOWN; 3001 unix_state_lock(other); 3002 other->sk_shutdown |= peer_mode; 3003 unix_state_unlock(other); 3004 other->sk_state_change(other); 3005 if (peer_mode == SHUTDOWN_MASK) 3006 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 3007 else if (peer_mode & RCV_SHUTDOWN) 3008 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 3009 } 3010 if (other) 3011 sock_put(other); 3012 3013 return 0; 3014 } 3015 3016 long unix_inq_len(struct sock *sk) 3017 { 3018 struct sk_buff *skb; 3019 long amount = 0; 3020 3021 if (sk->sk_state == TCP_LISTEN) 3022 return -EINVAL; 3023 3024 spin_lock(&sk->sk_receive_queue.lock); 3025 if (sk->sk_type == SOCK_STREAM || 3026 sk->sk_type == SOCK_SEQPACKET) { 3027 skb_queue_walk(&sk->sk_receive_queue, skb) 3028 amount += unix_skb_len(skb); 3029 } else { 3030 skb = skb_peek(&sk->sk_receive_queue); 3031 if (skb) 3032 amount = skb->len; 3033 } 3034 spin_unlock(&sk->sk_receive_queue.lock); 3035 3036 return amount; 3037 } 3038 EXPORT_SYMBOL_GPL(unix_inq_len); 3039 3040 long unix_outq_len(struct sock *sk) 3041 { 3042 return sk_wmem_alloc_get(sk); 3043 } 3044 EXPORT_SYMBOL_GPL(unix_outq_len); 3045 3046 static int unix_open_file(struct sock *sk) 3047 { 3048 struct path path; 3049 struct file *f; 3050 int fd; 3051 3052 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3053 return -EPERM; 3054 3055 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3056 return -ENOENT; 3057 3058 path = unix_sk(sk)->path; 3059 if (!path.dentry) 3060 return -ENOENT; 3061 3062 path_get(&path); 3063 3064 fd = get_unused_fd_flags(O_CLOEXEC); 3065 if (fd < 0) 3066 goto out; 3067 3068 f = dentry_open(&path, O_PATH, current_cred()); 3069 if (IS_ERR(f)) { 3070 put_unused_fd(fd); 3071 fd = PTR_ERR(f); 3072 goto out; 3073 } 3074 3075 fd_install(fd, f); 3076 out: 3077 path_put(&path); 3078 3079 return fd; 3080 } 3081 3082 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3083 { 3084 struct sock *sk = sock->sk; 3085 long amount = 0; 3086 int err; 3087 3088 switch (cmd) { 3089 case SIOCOUTQ: 3090 amount = unix_outq_len(sk); 3091 err = put_user(amount, (int __user *)arg); 3092 break; 3093 case SIOCINQ: 3094 amount = unix_inq_len(sk); 3095 if (amount < 0) 3096 err = amount; 3097 else 3098 err = put_user(amount, (int __user *)arg); 3099 break; 3100 case SIOCUNIXFILE: 3101 err = unix_open_file(sk); 3102 break; 3103 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3104 case SIOCATMARK: 3105 { 3106 struct sk_buff *skb; 3107 int answ = 0; 3108 3109 skb = skb_peek(&sk->sk_receive_queue); 3110 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3111 answ = 1; 3112 err = put_user(answ, (int __user *)arg); 3113 } 3114 break; 3115 #endif 3116 default: 3117 err = -ENOIOCTLCMD; 3118 break; 3119 } 3120 return err; 3121 } 3122 3123 #ifdef CONFIG_COMPAT 3124 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3125 { 3126 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3127 } 3128 #endif 3129 3130 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3131 { 3132 struct sock *sk = sock->sk; 3133 __poll_t mask; 3134 3135 sock_poll_wait(file, sock, wait); 3136 mask = 0; 3137 3138 /* exceptional events? */ 3139 if (sk->sk_err) 3140 mask |= EPOLLERR; 3141 if (sk->sk_shutdown == SHUTDOWN_MASK) 3142 mask |= EPOLLHUP; 3143 if (sk->sk_shutdown & RCV_SHUTDOWN) 3144 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3145 3146 /* readable? */ 3147 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3148 mask |= EPOLLIN | EPOLLRDNORM; 3149 if (sk_is_readable(sk)) 3150 mask |= EPOLLIN | EPOLLRDNORM; 3151 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3152 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3153 mask |= EPOLLPRI; 3154 #endif 3155 3156 /* Connection-based need to check for termination and startup */ 3157 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3158 sk->sk_state == TCP_CLOSE) 3159 mask |= EPOLLHUP; 3160 3161 /* 3162 * we set writable also when the other side has shut down the 3163 * connection. This prevents stuck sockets. 3164 */ 3165 if (unix_writable(sk)) 3166 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3167 3168 return mask; 3169 } 3170 3171 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3172 poll_table *wait) 3173 { 3174 struct sock *sk = sock->sk, *other; 3175 unsigned int writable; 3176 __poll_t mask; 3177 3178 sock_poll_wait(file, sock, wait); 3179 mask = 0; 3180 3181 /* exceptional events? */ 3182 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3183 mask |= EPOLLERR | 3184 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3185 3186 if (sk->sk_shutdown & RCV_SHUTDOWN) 3187 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3188 if (sk->sk_shutdown == SHUTDOWN_MASK) 3189 mask |= EPOLLHUP; 3190 3191 /* readable? */ 3192 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3193 mask |= EPOLLIN | EPOLLRDNORM; 3194 if (sk_is_readable(sk)) 3195 mask |= EPOLLIN | EPOLLRDNORM; 3196 3197 /* Connection-based need to check for termination and startup */ 3198 if (sk->sk_type == SOCK_SEQPACKET) { 3199 if (sk->sk_state == TCP_CLOSE) 3200 mask |= EPOLLHUP; 3201 /* connection hasn't started yet? */ 3202 if (sk->sk_state == TCP_SYN_SENT) 3203 return mask; 3204 } 3205 3206 /* No write status requested, avoid expensive OUT tests. */ 3207 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3208 return mask; 3209 3210 writable = unix_writable(sk); 3211 if (writable) { 3212 unix_state_lock(sk); 3213 3214 other = unix_peer(sk); 3215 if (other && unix_peer(other) != sk && 3216 unix_recvq_full_lockless(other) && 3217 unix_dgram_peer_wake_me(sk, other)) 3218 writable = 0; 3219 3220 unix_state_unlock(sk); 3221 } 3222 3223 if (writable) 3224 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3225 else 3226 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3227 3228 return mask; 3229 } 3230 3231 #ifdef CONFIG_PROC_FS 3232 3233 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3234 3235 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3236 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3237 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3238 3239 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3240 { 3241 unsigned long offset = get_offset(*pos); 3242 unsigned long bucket = get_bucket(*pos); 3243 unsigned long count = 0; 3244 struct sock *sk; 3245 3246 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3247 sk; sk = sk_next(sk)) { 3248 if (++count == offset) 3249 break; 3250 } 3251 3252 return sk; 3253 } 3254 3255 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3256 { 3257 unsigned long bucket = get_bucket(*pos); 3258 struct net *net = seq_file_net(seq); 3259 struct sock *sk; 3260 3261 while (bucket < UNIX_HASH_SIZE) { 3262 spin_lock(&net->unx.table.locks[bucket]); 3263 3264 sk = unix_from_bucket(seq, pos); 3265 if (sk) 3266 return sk; 3267 3268 spin_unlock(&net->unx.table.locks[bucket]); 3269 3270 *pos = set_bucket_offset(++bucket, 1); 3271 } 3272 3273 return NULL; 3274 } 3275 3276 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3277 loff_t *pos) 3278 { 3279 unsigned long bucket = get_bucket(*pos); 3280 3281 sk = sk_next(sk); 3282 if (sk) 3283 return sk; 3284 3285 3286 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3287 3288 *pos = set_bucket_offset(++bucket, 1); 3289 3290 return unix_get_first(seq, pos); 3291 } 3292 3293 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3294 { 3295 if (!*pos) 3296 return SEQ_START_TOKEN; 3297 3298 return unix_get_first(seq, pos); 3299 } 3300 3301 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3302 { 3303 ++*pos; 3304 3305 if (v == SEQ_START_TOKEN) 3306 return unix_get_first(seq, pos); 3307 3308 return unix_get_next(seq, v, pos); 3309 } 3310 3311 static void unix_seq_stop(struct seq_file *seq, void *v) 3312 { 3313 struct sock *sk = v; 3314 3315 if (sk) 3316 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3317 } 3318 3319 static int unix_seq_show(struct seq_file *seq, void *v) 3320 { 3321 3322 if (v == SEQ_START_TOKEN) 3323 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3324 "Inode Path\n"); 3325 else { 3326 struct sock *s = v; 3327 struct unix_sock *u = unix_sk(s); 3328 unix_state_lock(s); 3329 3330 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3331 s, 3332 refcount_read(&s->sk_refcnt), 3333 0, 3334 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3335 s->sk_type, 3336 s->sk_socket ? 3337 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3338 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3339 sock_i_ino(s)); 3340 3341 if (u->addr) { // under a hash table lock here 3342 int i, len; 3343 seq_putc(seq, ' '); 3344 3345 i = 0; 3346 len = u->addr->len - 3347 offsetof(struct sockaddr_un, sun_path); 3348 if (u->addr->name->sun_path[0]) { 3349 len--; 3350 } else { 3351 seq_putc(seq, '@'); 3352 i++; 3353 } 3354 for ( ; i < len; i++) 3355 seq_putc(seq, u->addr->name->sun_path[i] ?: 3356 '@'); 3357 } 3358 unix_state_unlock(s); 3359 seq_putc(seq, '\n'); 3360 } 3361 3362 return 0; 3363 } 3364 3365 static const struct seq_operations unix_seq_ops = { 3366 .start = unix_seq_start, 3367 .next = unix_seq_next, 3368 .stop = unix_seq_stop, 3369 .show = unix_seq_show, 3370 }; 3371 3372 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3373 struct bpf_unix_iter_state { 3374 struct seq_net_private p; 3375 unsigned int cur_sk; 3376 unsigned int end_sk; 3377 unsigned int max_sk; 3378 struct sock **batch; 3379 bool st_bucket_done; 3380 }; 3381 3382 struct bpf_iter__unix { 3383 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3384 __bpf_md_ptr(struct unix_sock *, unix_sk); 3385 uid_t uid __aligned(8); 3386 }; 3387 3388 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3389 struct unix_sock *unix_sk, uid_t uid) 3390 { 3391 struct bpf_iter__unix ctx; 3392 3393 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3394 ctx.meta = meta; 3395 ctx.unix_sk = unix_sk; 3396 ctx.uid = uid; 3397 return bpf_iter_run_prog(prog, &ctx); 3398 } 3399 3400 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3401 3402 { 3403 struct bpf_unix_iter_state *iter = seq->private; 3404 unsigned int expected = 1; 3405 struct sock *sk; 3406 3407 sock_hold(start_sk); 3408 iter->batch[iter->end_sk++] = start_sk; 3409 3410 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3411 if (iter->end_sk < iter->max_sk) { 3412 sock_hold(sk); 3413 iter->batch[iter->end_sk++] = sk; 3414 } 3415 3416 expected++; 3417 } 3418 3419 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3420 3421 return expected; 3422 } 3423 3424 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3425 { 3426 while (iter->cur_sk < iter->end_sk) 3427 sock_put(iter->batch[iter->cur_sk++]); 3428 } 3429 3430 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3431 unsigned int new_batch_sz) 3432 { 3433 struct sock **new_batch; 3434 3435 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3436 GFP_USER | __GFP_NOWARN); 3437 if (!new_batch) 3438 return -ENOMEM; 3439 3440 bpf_iter_unix_put_batch(iter); 3441 kvfree(iter->batch); 3442 iter->batch = new_batch; 3443 iter->max_sk = new_batch_sz; 3444 3445 return 0; 3446 } 3447 3448 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3449 loff_t *pos) 3450 { 3451 struct bpf_unix_iter_state *iter = seq->private; 3452 unsigned int expected; 3453 bool resized = false; 3454 struct sock *sk; 3455 3456 if (iter->st_bucket_done) 3457 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3458 3459 again: 3460 /* Get a new batch */ 3461 iter->cur_sk = 0; 3462 iter->end_sk = 0; 3463 3464 sk = unix_get_first(seq, pos); 3465 if (!sk) 3466 return NULL; /* Done */ 3467 3468 expected = bpf_iter_unix_hold_batch(seq, sk); 3469 3470 if (iter->end_sk == expected) { 3471 iter->st_bucket_done = true; 3472 return sk; 3473 } 3474 3475 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3476 resized = true; 3477 goto again; 3478 } 3479 3480 return sk; 3481 } 3482 3483 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3484 { 3485 if (!*pos) 3486 return SEQ_START_TOKEN; 3487 3488 /* bpf iter does not support lseek, so it always 3489 * continue from where it was stop()-ped. 3490 */ 3491 return bpf_iter_unix_batch(seq, pos); 3492 } 3493 3494 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3495 { 3496 struct bpf_unix_iter_state *iter = seq->private; 3497 struct sock *sk; 3498 3499 /* Whenever seq_next() is called, the iter->cur_sk is 3500 * done with seq_show(), so advance to the next sk in 3501 * the batch. 3502 */ 3503 if (iter->cur_sk < iter->end_sk) 3504 sock_put(iter->batch[iter->cur_sk++]); 3505 3506 ++*pos; 3507 3508 if (iter->cur_sk < iter->end_sk) 3509 sk = iter->batch[iter->cur_sk]; 3510 else 3511 sk = bpf_iter_unix_batch(seq, pos); 3512 3513 return sk; 3514 } 3515 3516 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3517 { 3518 struct bpf_iter_meta meta; 3519 struct bpf_prog *prog; 3520 struct sock *sk = v; 3521 uid_t uid; 3522 bool slow; 3523 int ret; 3524 3525 if (v == SEQ_START_TOKEN) 3526 return 0; 3527 3528 slow = lock_sock_fast(sk); 3529 3530 if (unlikely(sk_unhashed(sk))) { 3531 ret = SEQ_SKIP; 3532 goto unlock; 3533 } 3534 3535 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3536 meta.seq = seq; 3537 prog = bpf_iter_get_info(&meta, false); 3538 ret = unix_prog_seq_show(prog, &meta, v, uid); 3539 unlock: 3540 unlock_sock_fast(sk, slow); 3541 return ret; 3542 } 3543 3544 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3545 { 3546 struct bpf_unix_iter_state *iter = seq->private; 3547 struct bpf_iter_meta meta; 3548 struct bpf_prog *prog; 3549 3550 if (!v) { 3551 meta.seq = seq; 3552 prog = bpf_iter_get_info(&meta, true); 3553 if (prog) 3554 (void)unix_prog_seq_show(prog, &meta, v, 0); 3555 } 3556 3557 if (iter->cur_sk < iter->end_sk) 3558 bpf_iter_unix_put_batch(iter); 3559 } 3560 3561 static const struct seq_operations bpf_iter_unix_seq_ops = { 3562 .start = bpf_iter_unix_seq_start, 3563 .next = bpf_iter_unix_seq_next, 3564 .stop = bpf_iter_unix_seq_stop, 3565 .show = bpf_iter_unix_seq_show, 3566 }; 3567 #endif 3568 #endif 3569 3570 static const struct net_proto_family unix_family_ops = { 3571 .family = PF_UNIX, 3572 .create = unix_create, 3573 .owner = THIS_MODULE, 3574 }; 3575 3576 3577 static int __net_init unix_net_init(struct net *net) 3578 { 3579 int i; 3580 3581 net->unx.sysctl_max_dgram_qlen = 10; 3582 if (unix_sysctl_register(net)) 3583 goto out; 3584 3585 #ifdef CONFIG_PROC_FS 3586 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3587 sizeof(struct seq_net_private))) 3588 goto err_sysctl; 3589 #endif 3590 3591 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3592 sizeof(spinlock_t), GFP_KERNEL); 3593 if (!net->unx.table.locks) 3594 goto err_proc; 3595 3596 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3597 sizeof(struct hlist_head), 3598 GFP_KERNEL); 3599 if (!net->unx.table.buckets) 3600 goto free_locks; 3601 3602 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3603 spin_lock_init(&net->unx.table.locks[i]); 3604 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3605 } 3606 3607 return 0; 3608 3609 free_locks: 3610 kvfree(net->unx.table.locks); 3611 err_proc: 3612 #ifdef CONFIG_PROC_FS 3613 remove_proc_entry("unix", net->proc_net); 3614 err_sysctl: 3615 #endif 3616 unix_sysctl_unregister(net); 3617 out: 3618 return -ENOMEM; 3619 } 3620 3621 static void __net_exit unix_net_exit(struct net *net) 3622 { 3623 kvfree(net->unx.table.buckets); 3624 kvfree(net->unx.table.locks); 3625 unix_sysctl_unregister(net); 3626 remove_proc_entry("unix", net->proc_net); 3627 } 3628 3629 static struct pernet_operations unix_net_ops = { 3630 .init = unix_net_init, 3631 .exit = unix_net_exit, 3632 }; 3633 3634 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3635 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3636 struct unix_sock *unix_sk, uid_t uid) 3637 3638 #define INIT_BATCH_SZ 16 3639 3640 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3641 { 3642 struct bpf_unix_iter_state *iter = priv_data; 3643 int err; 3644 3645 err = bpf_iter_init_seq_net(priv_data, aux); 3646 if (err) 3647 return err; 3648 3649 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3650 if (err) { 3651 bpf_iter_fini_seq_net(priv_data); 3652 return err; 3653 } 3654 3655 return 0; 3656 } 3657 3658 static void bpf_iter_fini_unix(void *priv_data) 3659 { 3660 struct bpf_unix_iter_state *iter = priv_data; 3661 3662 bpf_iter_fini_seq_net(priv_data); 3663 kvfree(iter->batch); 3664 } 3665 3666 static const struct bpf_iter_seq_info unix_seq_info = { 3667 .seq_ops = &bpf_iter_unix_seq_ops, 3668 .init_seq_private = bpf_iter_init_unix, 3669 .fini_seq_private = bpf_iter_fini_unix, 3670 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3671 }; 3672 3673 static const struct bpf_func_proto * 3674 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3675 const struct bpf_prog *prog) 3676 { 3677 switch (func_id) { 3678 case BPF_FUNC_setsockopt: 3679 return &bpf_sk_setsockopt_proto; 3680 case BPF_FUNC_getsockopt: 3681 return &bpf_sk_getsockopt_proto; 3682 default: 3683 return NULL; 3684 } 3685 } 3686 3687 static struct bpf_iter_reg unix_reg_info = { 3688 .target = "unix", 3689 .ctx_arg_info_size = 1, 3690 .ctx_arg_info = { 3691 { offsetof(struct bpf_iter__unix, unix_sk), 3692 PTR_TO_BTF_ID_OR_NULL }, 3693 }, 3694 .get_func_proto = bpf_iter_unix_get_func_proto, 3695 .seq_info = &unix_seq_info, 3696 }; 3697 3698 static void __init bpf_iter_register(void) 3699 { 3700 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3701 if (bpf_iter_reg_target(&unix_reg_info)) 3702 pr_warn("Warning: could not register bpf iterator unix\n"); 3703 } 3704 #endif 3705 3706 static int __init af_unix_init(void) 3707 { 3708 int i, rc = -1; 3709 3710 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3711 3712 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3713 spin_lock_init(&bsd_socket_locks[i]); 3714 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3715 } 3716 3717 rc = proto_register(&unix_dgram_proto, 1); 3718 if (rc != 0) { 3719 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3720 goto out; 3721 } 3722 3723 rc = proto_register(&unix_stream_proto, 1); 3724 if (rc != 0) { 3725 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3726 goto out; 3727 } 3728 3729 sock_register(&unix_family_ops); 3730 register_pernet_subsys(&unix_net_ops); 3731 unix_bpf_build_proto(); 3732 3733 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3734 bpf_iter_register(); 3735 #endif 3736 3737 out: 3738 return rc; 3739 } 3740 3741 static void __exit af_unix_exit(void) 3742 { 3743 sock_unregister(PF_UNIX); 3744 proto_unregister(&unix_dgram_proto); 3745 proto_unregister(&unix_stream_proto); 3746 unregister_pernet_subsys(&unix_net_ops); 3747 } 3748 3749 /* Earlier than device_initcall() so that other drivers invoking 3750 request_module() don't end up in a loop when modprobe tries 3751 to use a UNIX socket. But later than subsys_initcall() because 3752 we depend on stuff initialised there */ 3753 fs_initcall(af_unix_init); 3754 module_exit(af_unix_exit); 3755 3756 MODULE_LICENSE("GPL"); 3757 MODULE_ALIAS_NETPROTO(PF_UNIX); 3758