1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/splice.h> 116 #include <linux/freezer.h> 117 #include <linux/file.h> 118 #include <linux/btf_ids.h> 119 120 #include "scm.h" 121 122 static atomic_long_t unix_nr_socks; 123 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 124 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 125 126 /* SMP locking strategy: 127 * hash table is protected with spinlock. 128 * each socket state is protected by separate spinlock. 129 */ 130 131 static unsigned int unix_unbound_hash(struct sock *sk) 132 { 133 unsigned long hash = (unsigned long)sk; 134 135 hash ^= hash >> 16; 136 hash ^= hash >> 8; 137 hash ^= sk->sk_type; 138 139 return hash & UNIX_HASH_MOD; 140 } 141 142 static unsigned int unix_bsd_hash(struct inode *i) 143 { 144 return i->i_ino & UNIX_HASH_MOD; 145 } 146 147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 148 int addr_len, int type) 149 { 150 __wsum csum = csum_partial(sunaddr, addr_len, 0); 151 unsigned int hash; 152 153 hash = (__force unsigned int)csum_fold(csum); 154 hash ^= hash >> 8; 155 hash ^= type; 156 157 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 158 } 159 160 static void unix_table_double_lock(struct net *net, 161 unsigned int hash1, unsigned int hash2) 162 { 163 if (hash1 == hash2) { 164 spin_lock(&net->unx.table.locks[hash1]); 165 return; 166 } 167 168 if (hash1 > hash2) 169 swap(hash1, hash2); 170 171 spin_lock(&net->unx.table.locks[hash1]); 172 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 173 } 174 175 static void unix_table_double_unlock(struct net *net, 176 unsigned int hash1, unsigned int hash2) 177 { 178 if (hash1 == hash2) { 179 spin_unlock(&net->unx.table.locks[hash1]); 180 return; 181 } 182 183 spin_unlock(&net->unx.table.locks[hash1]); 184 spin_unlock(&net->unx.table.locks[hash2]); 185 } 186 187 #ifdef CONFIG_SECURITY_NETWORK 188 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 189 { 190 UNIXCB(skb).secid = scm->secid; 191 } 192 193 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 194 { 195 scm->secid = UNIXCB(skb).secid; 196 } 197 198 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 199 { 200 return (scm->secid == UNIXCB(skb).secid); 201 } 202 #else 203 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 204 { } 205 206 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 207 { } 208 209 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 210 { 211 return true; 212 } 213 #endif /* CONFIG_SECURITY_NETWORK */ 214 215 #define unix_peer(sk) (unix_sk(sk)->peer) 216 217 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 218 { 219 return unix_peer(osk) == sk; 220 } 221 222 static inline int unix_may_send(struct sock *sk, struct sock *osk) 223 { 224 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 225 } 226 227 static inline int unix_recvq_full(const struct sock *sk) 228 { 229 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 230 } 231 232 static inline int unix_recvq_full_lockless(const struct sock *sk) 233 { 234 return skb_queue_len_lockless(&sk->sk_receive_queue) > 235 READ_ONCE(sk->sk_max_ack_backlog); 236 } 237 238 struct sock *unix_peer_get(struct sock *s) 239 { 240 struct sock *peer; 241 242 unix_state_lock(s); 243 peer = unix_peer(s); 244 if (peer) 245 sock_hold(peer); 246 unix_state_unlock(s); 247 return peer; 248 } 249 EXPORT_SYMBOL_GPL(unix_peer_get); 250 251 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 252 int addr_len) 253 { 254 struct unix_address *addr; 255 256 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 257 if (!addr) 258 return NULL; 259 260 refcount_set(&addr->refcnt, 1); 261 addr->len = addr_len; 262 memcpy(addr->name, sunaddr, addr_len); 263 264 return addr; 265 } 266 267 static inline void unix_release_addr(struct unix_address *addr) 268 { 269 if (refcount_dec_and_test(&addr->refcnt)) 270 kfree(addr); 271 } 272 273 /* 274 * Check unix socket name: 275 * - should be not zero length. 276 * - if started by not zero, should be NULL terminated (FS object) 277 * - if started by zero, it is abstract name. 278 */ 279 280 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 281 { 282 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 283 addr_len > sizeof(*sunaddr)) 284 return -EINVAL; 285 286 if (sunaddr->sun_family != AF_UNIX) 287 return -EINVAL; 288 289 return 0; 290 } 291 292 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 293 { 294 /* This may look like an off by one error but it is a bit more 295 * subtle. 108 is the longest valid AF_UNIX path for a binding. 296 * sun_path[108] doesn't as such exist. However in kernel space 297 * we are guaranteed that it is a valid memory location in our 298 * kernel address buffer because syscall functions always pass 299 * a pointer of struct sockaddr_storage which has a bigger buffer 300 * than 108. 301 */ 302 ((char *)sunaddr)[addr_len] = 0; 303 } 304 305 static void __unix_remove_socket(struct sock *sk) 306 { 307 sk_del_node_init(sk); 308 } 309 310 static void __unix_insert_socket(struct net *net, struct sock *sk) 311 { 312 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 313 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 314 } 315 316 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 317 struct unix_address *addr, unsigned int hash) 318 { 319 __unix_remove_socket(sk); 320 smp_store_release(&unix_sk(sk)->addr, addr); 321 322 sk->sk_hash = hash; 323 __unix_insert_socket(net, sk); 324 } 325 326 static void unix_remove_socket(struct net *net, struct sock *sk) 327 { 328 spin_lock(&net->unx.table.locks[sk->sk_hash]); 329 __unix_remove_socket(sk); 330 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 331 } 332 333 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 334 { 335 spin_lock(&net->unx.table.locks[sk->sk_hash]); 336 __unix_insert_socket(net, sk); 337 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 338 } 339 340 static void unix_insert_bsd_socket(struct sock *sk) 341 { 342 spin_lock(&bsd_socket_locks[sk->sk_hash]); 343 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 344 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 345 } 346 347 static void unix_remove_bsd_socket(struct sock *sk) 348 { 349 if (!hlist_unhashed(&sk->sk_bind_node)) { 350 spin_lock(&bsd_socket_locks[sk->sk_hash]); 351 __sk_del_bind_node(sk); 352 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 353 354 sk_node_init(&sk->sk_bind_node); 355 } 356 } 357 358 static struct sock *__unix_find_socket_byname(struct net *net, 359 struct sockaddr_un *sunname, 360 int len, unsigned int hash) 361 { 362 struct sock *s; 363 364 sk_for_each(s, &net->unx.table.buckets[hash]) { 365 struct unix_sock *u = unix_sk(s); 366 367 if (u->addr->len == len && 368 !memcmp(u->addr->name, sunname, len)) 369 return s; 370 } 371 return NULL; 372 } 373 374 static inline struct sock *unix_find_socket_byname(struct net *net, 375 struct sockaddr_un *sunname, 376 int len, unsigned int hash) 377 { 378 struct sock *s; 379 380 spin_lock(&net->unx.table.locks[hash]); 381 s = __unix_find_socket_byname(net, sunname, len, hash); 382 if (s) 383 sock_hold(s); 384 spin_unlock(&net->unx.table.locks[hash]); 385 return s; 386 } 387 388 static struct sock *unix_find_socket_byinode(struct inode *i) 389 { 390 unsigned int hash = unix_bsd_hash(i); 391 struct sock *s; 392 393 spin_lock(&bsd_socket_locks[hash]); 394 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 395 struct dentry *dentry = unix_sk(s)->path.dentry; 396 397 if (dentry && d_backing_inode(dentry) == i) { 398 sock_hold(s); 399 spin_unlock(&bsd_socket_locks[hash]); 400 return s; 401 } 402 } 403 spin_unlock(&bsd_socket_locks[hash]); 404 return NULL; 405 } 406 407 /* Support code for asymmetrically connected dgram sockets 408 * 409 * If a datagram socket is connected to a socket not itself connected 410 * to the first socket (eg, /dev/log), clients may only enqueue more 411 * messages if the present receive queue of the server socket is not 412 * "too large". This means there's a second writeability condition 413 * poll and sendmsg need to test. The dgram recv code will do a wake 414 * up on the peer_wait wait queue of a socket upon reception of a 415 * datagram which needs to be propagated to sleeping would-be writers 416 * since these might not have sent anything so far. This can't be 417 * accomplished via poll_wait because the lifetime of the server 418 * socket might be less than that of its clients if these break their 419 * association with it or if the server socket is closed while clients 420 * are still connected to it and there's no way to inform "a polling 421 * implementation" that it should let go of a certain wait queue 422 * 423 * In order to propagate a wake up, a wait_queue_entry_t of the client 424 * socket is enqueued on the peer_wait queue of the server socket 425 * whose wake function does a wake_up on the ordinary client socket 426 * wait queue. This connection is established whenever a write (or 427 * poll for write) hit the flow control condition and broken when the 428 * association to the server socket is dissolved or after a wake up 429 * was relayed. 430 */ 431 432 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 433 void *key) 434 { 435 struct unix_sock *u; 436 wait_queue_head_t *u_sleep; 437 438 u = container_of(q, struct unix_sock, peer_wake); 439 440 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 441 q); 442 u->peer_wake.private = NULL; 443 444 /* relaying can only happen while the wq still exists */ 445 u_sleep = sk_sleep(&u->sk); 446 if (u_sleep) 447 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 448 449 return 0; 450 } 451 452 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 453 { 454 struct unix_sock *u, *u_other; 455 int rc; 456 457 u = unix_sk(sk); 458 u_other = unix_sk(other); 459 rc = 0; 460 spin_lock(&u_other->peer_wait.lock); 461 462 if (!u->peer_wake.private) { 463 u->peer_wake.private = other; 464 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 465 466 rc = 1; 467 } 468 469 spin_unlock(&u_other->peer_wait.lock); 470 return rc; 471 } 472 473 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 474 struct sock *other) 475 { 476 struct unix_sock *u, *u_other; 477 478 u = unix_sk(sk); 479 u_other = unix_sk(other); 480 spin_lock(&u_other->peer_wait.lock); 481 482 if (u->peer_wake.private == other) { 483 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 484 u->peer_wake.private = NULL; 485 } 486 487 spin_unlock(&u_other->peer_wait.lock); 488 } 489 490 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 491 struct sock *other) 492 { 493 unix_dgram_peer_wake_disconnect(sk, other); 494 wake_up_interruptible_poll(sk_sleep(sk), 495 EPOLLOUT | 496 EPOLLWRNORM | 497 EPOLLWRBAND); 498 } 499 500 /* preconditions: 501 * - unix_peer(sk) == other 502 * - association is stable 503 */ 504 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 505 { 506 int connected; 507 508 connected = unix_dgram_peer_wake_connect(sk, other); 509 510 /* If other is SOCK_DEAD, we want to make sure we signal 511 * POLLOUT, such that a subsequent write() can get a 512 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 513 * to other and its full, we will hang waiting for POLLOUT. 514 */ 515 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 516 return 1; 517 518 if (connected) 519 unix_dgram_peer_wake_disconnect(sk, other); 520 521 return 0; 522 } 523 524 static int unix_writable(const struct sock *sk) 525 { 526 return sk->sk_state != TCP_LISTEN && 527 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 528 } 529 530 static void unix_write_space(struct sock *sk) 531 { 532 struct socket_wq *wq; 533 534 rcu_read_lock(); 535 if (unix_writable(sk)) { 536 wq = rcu_dereference(sk->sk_wq); 537 if (skwq_has_sleeper(wq)) 538 wake_up_interruptible_sync_poll(&wq->wait, 539 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 540 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 541 } 542 rcu_read_unlock(); 543 } 544 545 /* When dgram socket disconnects (or changes its peer), we clear its receive 546 * queue of packets arrived from previous peer. First, it allows to do 547 * flow control based only on wmem_alloc; second, sk connected to peer 548 * may receive messages only from that peer. */ 549 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 550 { 551 if (!skb_queue_empty(&sk->sk_receive_queue)) { 552 skb_queue_purge(&sk->sk_receive_queue); 553 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 554 555 /* If one link of bidirectional dgram pipe is disconnected, 556 * we signal error. Messages are lost. Do not make this, 557 * when peer was not connected to us. 558 */ 559 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 560 WRITE_ONCE(other->sk_err, ECONNRESET); 561 sk_error_report(other); 562 } 563 } 564 other->sk_state = TCP_CLOSE; 565 } 566 567 static void unix_sock_destructor(struct sock *sk) 568 { 569 struct unix_sock *u = unix_sk(sk); 570 571 skb_queue_purge(&sk->sk_receive_queue); 572 573 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 574 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 575 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 576 if (!sock_flag(sk, SOCK_DEAD)) { 577 pr_info("Attempt to release alive unix socket: %p\n", sk); 578 return; 579 } 580 581 if (u->addr) 582 unix_release_addr(u->addr); 583 584 atomic_long_dec(&unix_nr_socks); 585 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 586 #ifdef UNIX_REFCNT_DEBUG 587 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 588 atomic_long_read(&unix_nr_socks)); 589 #endif 590 } 591 592 static void unix_release_sock(struct sock *sk, int embrion) 593 { 594 struct unix_sock *u = unix_sk(sk); 595 struct sock *skpair; 596 struct sk_buff *skb; 597 struct path path; 598 int state; 599 600 unix_remove_socket(sock_net(sk), sk); 601 unix_remove_bsd_socket(sk); 602 603 /* Clear state */ 604 unix_state_lock(sk); 605 sock_orphan(sk); 606 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); 607 path = u->path; 608 u->path.dentry = NULL; 609 u->path.mnt = NULL; 610 state = sk->sk_state; 611 sk->sk_state = TCP_CLOSE; 612 613 skpair = unix_peer(sk); 614 unix_peer(sk) = NULL; 615 616 unix_state_unlock(sk); 617 618 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 619 if (u->oob_skb) { 620 kfree_skb(u->oob_skb); 621 u->oob_skb = NULL; 622 } 623 #endif 624 625 wake_up_interruptible_all(&u->peer_wait); 626 627 if (skpair != NULL) { 628 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 629 unix_state_lock(skpair); 630 /* No more writes */ 631 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); 632 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 633 WRITE_ONCE(skpair->sk_err, ECONNRESET); 634 unix_state_unlock(skpair); 635 skpair->sk_state_change(skpair); 636 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 637 } 638 639 unix_dgram_peer_wake_disconnect(sk, skpair); 640 sock_put(skpair); /* It may now die */ 641 } 642 643 /* Try to flush out this socket. Throw out buffers at least */ 644 645 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 646 if (state == TCP_LISTEN) 647 unix_release_sock(skb->sk, 1); 648 /* passed fds are erased in the kfree_skb hook */ 649 UNIXCB(skb).consumed = skb->len; 650 kfree_skb(skb); 651 } 652 653 if (path.dentry) 654 path_put(&path); 655 656 sock_put(sk); 657 658 /* ---- Socket is dead now and most probably destroyed ---- */ 659 660 /* 661 * Fixme: BSD difference: In BSD all sockets connected to us get 662 * ECONNRESET and we die on the spot. In Linux we behave 663 * like files and pipes do and wait for the last 664 * dereference. 665 * 666 * Can't we simply set sock->err? 667 * 668 * What the above comment does talk about? --ANK(980817) 669 */ 670 671 if (unix_tot_inflight) 672 unix_gc(); /* Garbage collect fds */ 673 } 674 675 static void init_peercred(struct sock *sk) 676 { 677 const struct cred *old_cred; 678 struct pid *old_pid; 679 680 spin_lock(&sk->sk_peer_lock); 681 old_pid = sk->sk_peer_pid; 682 old_cred = sk->sk_peer_cred; 683 sk->sk_peer_pid = get_pid(task_tgid(current)); 684 sk->sk_peer_cred = get_current_cred(); 685 spin_unlock(&sk->sk_peer_lock); 686 687 put_pid(old_pid); 688 put_cred(old_cred); 689 } 690 691 static void copy_peercred(struct sock *sk, struct sock *peersk) 692 { 693 const struct cred *old_cred; 694 struct pid *old_pid; 695 696 if (sk < peersk) { 697 spin_lock(&sk->sk_peer_lock); 698 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 699 } else { 700 spin_lock(&peersk->sk_peer_lock); 701 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 702 } 703 old_pid = sk->sk_peer_pid; 704 old_cred = sk->sk_peer_cred; 705 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 706 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 707 708 spin_unlock(&sk->sk_peer_lock); 709 spin_unlock(&peersk->sk_peer_lock); 710 711 put_pid(old_pid); 712 put_cred(old_cred); 713 } 714 715 static int unix_listen(struct socket *sock, int backlog) 716 { 717 int err; 718 struct sock *sk = sock->sk; 719 struct unix_sock *u = unix_sk(sk); 720 721 err = -EOPNOTSUPP; 722 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 723 goto out; /* Only stream/seqpacket sockets accept */ 724 err = -EINVAL; 725 if (!u->addr) 726 goto out; /* No listens on an unbound socket */ 727 unix_state_lock(sk); 728 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 729 goto out_unlock; 730 if (backlog > sk->sk_max_ack_backlog) 731 wake_up_interruptible_all(&u->peer_wait); 732 sk->sk_max_ack_backlog = backlog; 733 sk->sk_state = TCP_LISTEN; 734 /* set credentials so connect can copy them */ 735 init_peercred(sk); 736 err = 0; 737 738 out_unlock: 739 unix_state_unlock(sk); 740 out: 741 return err; 742 } 743 744 static int unix_release(struct socket *); 745 static int unix_bind(struct socket *, struct sockaddr *, int); 746 static int unix_stream_connect(struct socket *, struct sockaddr *, 747 int addr_len, int flags); 748 static int unix_socketpair(struct socket *, struct socket *); 749 static int unix_accept(struct socket *, struct socket *, int, bool); 750 static int unix_getname(struct socket *, struct sockaddr *, int); 751 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 752 static __poll_t unix_dgram_poll(struct file *, struct socket *, 753 poll_table *); 754 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 755 #ifdef CONFIG_COMPAT 756 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 757 #endif 758 static int unix_shutdown(struct socket *, int); 759 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 760 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 761 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 762 size_t size, int flags); 763 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 764 struct pipe_inode_info *, size_t size, 765 unsigned int flags); 766 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 767 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 768 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 769 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 770 static int unix_dgram_connect(struct socket *, struct sockaddr *, 771 int, int); 772 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 773 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 774 int); 775 776 static int unix_set_peek_off(struct sock *sk, int val) 777 { 778 struct unix_sock *u = unix_sk(sk); 779 780 if (mutex_lock_interruptible(&u->iolock)) 781 return -EINTR; 782 783 sk->sk_peek_off = val; 784 mutex_unlock(&u->iolock); 785 786 return 0; 787 } 788 789 #ifdef CONFIG_PROC_FS 790 static int unix_count_nr_fds(struct sock *sk) 791 { 792 struct sk_buff *skb; 793 struct unix_sock *u; 794 int nr_fds = 0; 795 796 spin_lock(&sk->sk_receive_queue.lock); 797 skb = skb_peek(&sk->sk_receive_queue); 798 while (skb) { 799 u = unix_sk(skb->sk); 800 nr_fds += atomic_read(&u->scm_stat.nr_fds); 801 skb = skb_peek_next(skb, &sk->sk_receive_queue); 802 } 803 spin_unlock(&sk->sk_receive_queue.lock); 804 805 return nr_fds; 806 } 807 808 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 809 { 810 struct sock *sk = sock->sk; 811 unsigned char s_state; 812 struct unix_sock *u; 813 int nr_fds = 0; 814 815 if (sk) { 816 s_state = READ_ONCE(sk->sk_state); 817 u = unix_sk(sk); 818 819 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their 820 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN. 821 * SOCK_DGRAM is ordinary. So, no lock is needed. 822 */ 823 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED) 824 nr_fds = atomic_read(&u->scm_stat.nr_fds); 825 else if (s_state == TCP_LISTEN) 826 nr_fds = unix_count_nr_fds(sk); 827 828 seq_printf(m, "scm_fds: %u\n", nr_fds); 829 } 830 } 831 #else 832 #define unix_show_fdinfo NULL 833 #endif 834 835 static const struct proto_ops unix_stream_ops = { 836 .family = PF_UNIX, 837 .owner = THIS_MODULE, 838 .release = unix_release, 839 .bind = unix_bind, 840 .connect = unix_stream_connect, 841 .socketpair = unix_socketpair, 842 .accept = unix_accept, 843 .getname = unix_getname, 844 .poll = unix_poll, 845 .ioctl = unix_ioctl, 846 #ifdef CONFIG_COMPAT 847 .compat_ioctl = unix_compat_ioctl, 848 #endif 849 .listen = unix_listen, 850 .shutdown = unix_shutdown, 851 .sendmsg = unix_stream_sendmsg, 852 .recvmsg = unix_stream_recvmsg, 853 .read_skb = unix_stream_read_skb, 854 .mmap = sock_no_mmap, 855 .sendpage = unix_stream_sendpage, 856 .splice_read = unix_stream_splice_read, 857 .set_peek_off = unix_set_peek_off, 858 .show_fdinfo = unix_show_fdinfo, 859 }; 860 861 static const struct proto_ops unix_dgram_ops = { 862 .family = PF_UNIX, 863 .owner = THIS_MODULE, 864 .release = unix_release, 865 .bind = unix_bind, 866 .connect = unix_dgram_connect, 867 .socketpair = unix_socketpair, 868 .accept = sock_no_accept, 869 .getname = unix_getname, 870 .poll = unix_dgram_poll, 871 .ioctl = unix_ioctl, 872 #ifdef CONFIG_COMPAT 873 .compat_ioctl = unix_compat_ioctl, 874 #endif 875 .listen = sock_no_listen, 876 .shutdown = unix_shutdown, 877 .sendmsg = unix_dgram_sendmsg, 878 .read_skb = unix_read_skb, 879 .recvmsg = unix_dgram_recvmsg, 880 .mmap = sock_no_mmap, 881 .sendpage = sock_no_sendpage, 882 .set_peek_off = unix_set_peek_off, 883 .show_fdinfo = unix_show_fdinfo, 884 }; 885 886 static const struct proto_ops unix_seqpacket_ops = { 887 .family = PF_UNIX, 888 .owner = THIS_MODULE, 889 .release = unix_release, 890 .bind = unix_bind, 891 .connect = unix_stream_connect, 892 .socketpair = unix_socketpair, 893 .accept = unix_accept, 894 .getname = unix_getname, 895 .poll = unix_dgram_poll, 896 .ioctl = unix_ioctl, 897 #ifdef CONFIG_COMPAT 898 .compat_ioctl = unix_compat_ioctl, 899 #endif 900 .listen = unix_listen, 901 .shutdown = unix_shutdown, 902 .sendmsg = unix_seqpacket_sendmsg, 903 .recvmsg = unix_seqpacket_recvmsg, 904 .mmap = sock_no_mmap, 905 .sendpage = sock_no_sendpage, 906 .set_peek_off = unix_set_peek_off, 907 .show_fdinfo = unix_show_fdinfo, 908 }; 909 910 static void unix_close(struct sock *sk, long timeout) 911 { 912 /* Nothing to do here, unix socket does not need a ->close(). 913 * This is merely for sockmap. 914 */ 915 } 916 917 static void unix_unhash(struct sock *sk) 918 { 919 /* Nothing to do here, unix socket does not need a ->unhash(). 920 * This is merely for sockmap. 921 */ 922 } 923 924 static bool unix_bpf_bypass_getsockopt(int level, int optname) 925 { 926 if (level == SOL_SOCKET) { 927 switch (optname) { 928 case SO_PEERPIDFD: 929 return true; 930 default: 931 return false; 932 } 933 } 934 935 return false; 936 } 937 938 struct proto unix_dgram_proto = { 939 .name = "UNIX", 940 .owner = THIS_MODULE, 941 .obj_size = sizeof(struct unix_sock), 942 .close = unix_close, 943 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 944 #ifdef CONFIG_BPF_SYSCALL 945 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 946 #endif 947 }; 948 949 struct proto unix_stream_proto = { 950 .name = "UNIX-STREAM", 951 .owner = THIS_MODULE, 952 .obj_size = sizeof(struct unix_sock), 953 .close = unix_close, 954 .unhash = unix_unhash, 955 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 956 #ifdef CONFIG_BPF_SYSCALL 957 .psock_update_sk_prot = unix_stream_bpf_update_proto, 958 #endif 959 }; 960 961 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 962 { 963 struct unix_sock *u; 964 struct sock *sk; 965 int err; 966 967 atomic_long_inc(&unix_nr_socks); 968 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 969 err = -ENFILE; 970 goto err; 971 } 972 973 if (type == SOCK_STREAM) 974 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 975 else /*dgram and seqpacket */ 976 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 977 978 if (!sk) { 979 err = -ENOMEM; 980 goto err; 981 } 982 983 sock_init_data(sock, sk); 984 985 sk->sk_hash = unix_unbound_hash(sk); 986 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 987 sk->sk_write_space = unix_write_space; 988 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 989 sk->sk_destruct = unix_sock_destructor; 990 u = unix_sk(sk); 991 u->path.dentry = NULL; 992 u->path.mnt = NULL; 993 spin_lock_init(&u->lock); 994 atomic_long_set(&u->inflight, 0); 995 INIT_LIST_HEAD(&u->link); 996 mutex_init(&u->iolock); /* single task reading lock */ 997 mutex_init(&u->bindlock); /* single task binding lock */ 998 init_waitqueue_head(&u->peer_wait); 999 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 1000 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 1001 unix_insert_unbound_socket(net, sk); 1002 1003 sock_prot_inuse_add(net, sk->sk_prot, 1); 1004 1005 return sk; 1006 1007 err: 1008 atomic_long_dec(&unix_nr_socks); 1009 return ERR_PTR(err); 1010 } 1011 1012 static int unix_create(struct net *net, struct socket *sock, int protocol, 1013 int kern) 1014 { 1015 struct sock *sk; 1016 1017 if (protocol && protocol != PF_UNIX) 1018 return -EPROTONOSUPPORT; 1019 1020 sock->state = SS_UNCONNECTED; 1021 1022 switch (sock->type) { 1023 case SOCK_STREAM: 1024 sock->ops = &unix_stream_ops; 1025 break; 1026 /* 1027 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 1028 * nothing uses it. 1029 */ 1030 case SOCK_RAW: 1031 sock->type = SOCK_DGRAM; 1032 fallthrough; 1033 case SOCK_DGRAM: 1034 sock->ops = &unix_dgram_ops; 1035 break; 1036 case SOCK_SEQPACKET: 1037 sock->ops = &unix_seqpacket_ops; 1038 break; 1039 default: 1040 return -ESOCKTNOSUPPORT; 1041 } 1042 1043 sk = unix_create1(net, sock, kern, sock->type); 1044 if (IS_ERR(sk)) 1045 return PTR_ERR(sk); 1046 1047 return 0; 1048 } 1049 1050 static int unix_release(struct socket *sock) 1051 { 1052 struct sock *sk = sock->sk; 1053 1054 if (!sk) 1055 return 0; 1056 1057 sk->sk_prot->close(sk, 0); 1058 unix_release_sock(sk, 0); 1059 sock->sk = NULL; 1060 1061 return 0; 1062 } 1063 1064 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1065 int type) 1066 { 1067 struct inode *inode; 1068 struct path path; 1069 struct sock *sk; 1070 int err; 1071 1072 unix_mkname_bsd(sunaddr, addr_len); 1073 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1074 if (err) 1075 goto fail; 1076 1077 err = path_permission(&path, MAY_WRITE); 1078 if (err) 1079 goto path_put; 1080 1081 err = -ECONNREFUSED; 1082 inode = d_backing_inode(path.dentry); 1083 if (!S_ISSOCK(inode->i_mode)) 1084 goto path_put; 1085 1086 sk = unix_find_socket_byinode(inode); 1087 if (!sk) 1088 goto path_put; 1089 1090 err = -EPROTOTYPE; 1091 if (sk->sk_type == type) 1092 touch_atime(&path); 1093 else 1094 goto sock_put; 1095 1096 path_put(&path); 1097 1098 return sk; 1099 1100 sock_put: 1101 sock_put(sk); 1102 path_put: 1103 path_put(&path); 1104 fail: 1105 return ERR_PTR(err); 1106 } 1107 1108 static struct sock *unix_find_abstract(struct net *net, 1109 struct sockaddr_un *sunaddr, 1110 int addr_len, int type) 1111 { 1112 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1113 struct dentry *dentry; 1114 struct sock *sk; 1115 1116 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1117 if (!sk) 1118 return ERR_PTR(-ECONNREFUSED); 1119 1120 dentry = unix_sk(sk)->path.dentry; 1121 if (dentry) 1122 touch_atime(&unix_sk(sk)->path); 1123 1124 return sk; 1125 } 1126 1127 static struct sock *unix_find_other(struct net *net, 1128 struct sockaddr_un *sunaddr, 1129 int addr_len, int type) 1130 { 1131 struct sock *sk; 1132 1133 if (sunaddr->sun_path[0]) 1134 sk = unix_find_bsd(sunaddr, addr_len, type); 1135 else 1136 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1137 1138 return sk; 1139 } 1140 1141 static int unix_autobind(struct sock *sk) 1142 { 1143 unsigned int new_hash, old_hash = sk->sk_hash; 1144 struct unix_sock *u = unix_sk(sk); 1145 struct net *net = sock_net(sk); 1146 struct unix_address *addr; 1147 u32 lastnum, ordernum; 1148 int err; 1149 1150 err = mutex_lock_interruptible(&u->bindlock); 1151 if (err) 1152 return err; 1153 1154 if (u->addr) 1155 goto out; 1156 1157 err = -ENOMEM; 1158 addr = kzalloc(sizeof(*addr) + 1159 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1160 if (!addr) 1161 goto out; 1162 1163 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1164 addr->name->sun_family = AF_UNIX; 1165 refcount_set(&addr->refcnt, 1); 1166 1167 ordernum = get_random_u32(); 1168 lastnum = ordernum & 0xFFFFF; 1169 retry: 1170 ordernum = (ordernum + 1) & 0xFFFFF; 1171 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1172 1173 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1174 unix_table_double_lock(net, old_hash, new_hash); 1175 1176 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1177 unix_table_double_unlock(net, old_hash, new_hash); 1178 1179 /* __unix_find_socket_byname() may take long time if many names 1180 * are already in use. 1181 */ 1182 cond_resched(); 1183 1184 if (ordernum == lastnum) { 1185 /* Give up if all names seems to be in use. */ 1186 err = -ENOSPC; 1187 unix_release_addr(addr); 1188 goto out; 1189 } 1190 1191 goto retry; 1192 } 1193 1194 __unix_set_addr_hash(net, sk, addr, new_hash); 1195 unix_table_double_unlock(net, old_hash, new_hash); 1196 err = 0; 1197 1198 out: mutex_unlock(&u->bindlock); 1199 return err; 1200 } 1201 1202 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1203 int addr_len) 1204 { 1205 umode_t mode = S_IFSOCK | 1206 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1207 unsigned int new_hash, old_hash = sk->sk_hash; 1208 struct unix_sock *u = unix_sk(sk); 1209 struct net *net = sock_net(sk); 1210 struct mnt_idmap *idmap; 1211 struct unix_address *addr; 1212 struct dentry *dentry; 1213 struct path parent; 1214 int err; 1215 1216 unix_mkname_bsd(sunaddr, addr_len); 1217 addr_len = strlen(sunaddr->sun_path) + 1218 offsetof(struct sockaddr_un, sun_path) + 1; 1219 1220 addr = unix_create_addr(sunaddr, addr_len); 1221 if (!addr) 1222 return -ENOMEM; 1223 1224 /* 1225 * Get the parent directory, calculate the hash for last 1226 * component. 1227 */ 1228 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1229 if (IS_ERR(dentry)) { 1230 err = PTR_ERR(dentry); 1231 goto out; 1232 } 1233 1234 /* 1235 * All right, let's create it. 1236 */ 1237 idmap = mnt_idmap(parent.mnt); 1238 err = security_path_mknod(&parent, dentry, mode, 0); 1239 if (!err) 1240 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); 1241 if (err) 1242 goto out_path; 1243 err = mutex_lock_interruptible(&u->bindlock); 1244 if (err) 1245 goto out_unlink; 1246 if (u->addr) 1247 goto out_unlock; 1248 1249 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1250 unix_table_double_lock(net, old_hash, new_hash); 1251 u->path.mnt = mntget(parent.mnt); 1252 u->path.dentry = dget(dentry); 1253 __unix_set_addr_hash(net, sk, addr, new_hash); 1254 unix_table_double_unlock(net, old_hash, new_hash); 1255 unix_insert_bsd_socket(sk); 1256 mutex_unlock(&u->bindlock); 1257 done_path_create(&parent, dentry); 1258 return 0; 1259 1260 out_unlock: 1261 mutex_unlock(&u->bindlock); 1262 err = -EINVAL; 1263 out_unlink: 1264 /* failed after successful mknod? unlink what we'd created... */ 1265 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL); 1266 out_path: 1267 done_path_create(&parent, dentry); 1268 out: 1269 unix_release_addr(addr); 1270 return err == -EEXIST ? -EADDRINUSE : err; 1271 } 1272 1273 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1274 int addr_len) 1275 { 1276 unsigned int new_hash, old_hash = sk->sk_hash; 1277 struct unix_sock *u = unix_sk(sk); 1278 struct net *net = sock_net(sk); 1279 struct unix_address *addr; 1280 int err; 1281 1282 addr = unix_create_addr(sunaddr, addr_len); 1283 if (!addr) 1284 return -ENOMEM; 1285 1286 err = mutex_lock_interruptible(&u->bindlock); 1287 if (err) 1288 goto out; 1289 1290 if (u->addr) { 1291 err = -EINVAL; 1292 goto out_mutex; 1293 } 1294 1295 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1296 unix_table_double_lock(net, old_hash, new_hash); 1297 1298 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1299 goto out_spin; 1300 1301 __unix_set_addr_hash(net, sk, addr, new_hash); 1302 unix_table_double_unlock(net, old_hash, new_hash); 1303 mutex_unlock(&u->bindlock); 1304 return 0; 1305 1306 out_spin: 1307 unix_table_double_unlock(net, old_hash, new_hash); 1308 err = -EADDRINUSE; 1309 out_mutex: 1310 mutex_unlock(&u->bindlock); 1311 out: 1312 unix_release_addr(addr); 1313 return err; 1314 } 1315 1316 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1317 { 1318 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1319 struct sock *sk = sock->sk; 1320 int err; 1321 1322 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1323 sunaddr->sun_family == AF_UNIX) 1324 return unix_autobind(sk); 1325 1326 err = unix_validate_addr(sunaddr, addr_len); 1327 if (err) 1328 return err; 1329 1330 if (sunaddr->sun_path[0]) 1331 err = unix_bind_bsd(sk, sunaddr, addr_len); 1332 else 1333 err = unix_bind_abstract(sk, sunaddr, addr_len); 1334 1335 return err; 1336 } 1337 1338 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1339 { 1340 if (unlikely(sk1 == sk2) || !sk2) { 1341 unix_state_lock(sk1); 1342 return; 1343 } 1344 if (sk1 < sk2) { 1345 unix_state_lock(sk1); 1346 unix_state_lock_nested(sk2); 1347 } else { 1348 unix_state_lock(sk2); 1349 unix_state_lock_nested(sk1); 1350 } 1351 } 1352 1353 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1354 { 1355 if (unlikely(sk1 == sk2) || !sk2) { 1356 unix_state_unlock(sk1); 1357 return; 1358 } 1359 unix_state_unlock(sk1); 1360 unix_state_unlock(sk2); 1361 } 1362 1363 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1364 int alen, int flags) 1365 { 1366 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1367 struct sock *sk = sock->sk; 1368 struct sock *other; 1369 int err; 1370 1371 err = -EINVAL; 1372 if (alen < offsetofend(struct sockaddr, sa_family)) 1373 goto out; 1374 1375 if (addr->sa_family != AF_UNSPEC) { 1376 err = unix_validate_addr(sunaddr, alen); 1377 if (err) 1378 goto out; 1379 1380 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1381 test_bit(SOCK_PASSPIDFD, &sock->flags)) && 1382 !unix_sk(sk)->addr) { 1383 err = unix_autobind(sk); 1384 if (err) 1385 goto out; 1386 } 1387 1388 restart: 1389 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1390 if (IS_ERR(other)) { 1391 err = PTR_ERR(other); 1392 goto out; 1393 } 1394 1395 unix_state_double_lock(sk, other); 1396 1397 /* Apparently VFS overslept socket death. Retry. */ 1398 if (sock_flag(other, SOCK_DEAD)) { 1399 unix_state_double_unlock(sk, other); 1400 sock_put(other); 1401 goto restart; 1402 } 1403 1404 err = -EPERM; 1405 if (!unix_may_send(sk, other)) 1406 goto out_unlock; 1407 1408 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1409 if (err) 1410 goto out_unlock; 1411 1412 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1413 } else { 1414 /* 1415 * 1003.1g breaking connected state with AF_UNSPEC 1416 */ 1417 other = NULL; 1418 unix_state_double_lock(sk, other); 1419 } 1420 1421 /* 1422 * If it was connected, reconnect. 1423 */ 1424 if (unix_peer(sk)) { 1425 struct sock *old_peer = unix_peer(sk); 1426 1427 unix_peer(sk) = other; 1428 if (!other) 1429 sk->sk_state = TCP_CLOSE; 1430 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1431 1432 unix_state_double_unlock(sk, other); 1433 1434 if (other != old_peer) 1435 unix_dgram_disconnected(sk, old_peer); 1436 sock_put(old_peer); 1437 } else { 1438 unix_peer(sk) = other; 1439 unix_state_double_unlock(sk, other); 1440 } 1441 1442 return 0; 1443 1444 out_unlock: 1445 unix_state_double_unlock(sk, other); 1446 sock_put(other); 1447 out: 1448 return err; 1449 } 1450 1451 static long unix_wait_for_peer(struct sock *other, long timeo) 1452 __releases(&unix_sk(other)->lock) 1453 { 1454 struct unix_sock *u = unix_sk(other); 1455 int sched; 1456 DEFINE_WAIT(wait); 1457 1458 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1459 1460 sched = !sock_flag(other, SOCK_DEAD) && 1461 !(other->sk_shutdown & RCV_SHUTDOWN) && 1462 unix_recvq_full_lockless(other); 1463 1464 unix_state_unlock(other); 1465 1466 if (sched) 1467 timeo = schedule_timeout(timeo); 1468 1469 finish_wait(&u->peer_wait, &wait); 1470 return timeo; 1471 } 1472 1473 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1474 int addr_len, int flags) 1475 { 1476 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1477 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1478 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1479 struct net *net = sock_net(sk); 1480 struct sk_buff *skb = NULL; 1481 long timeo; 1482 int err; 1483 int st; 1484 1485 err = unix_validate_addr(sunaddr, addr_len); 1486 if (err) 1487 goto out; 1488 1489 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1490 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { 1491 err = unix_autobind(sk); 1492 if (err) 1493 goto out; 1494 } 1495 1496 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1497 1498 /* First of all allocate resources. 1499 If we will make it after state is locked, 1500 we will have to recheck all again in any case. 1501 */ 1502 1503 /* create new sock for complete connection */ 1504 newsk = unix_create1(net, NULL, 0, sock->type); 1505 if (IS_ERR(newsk)) { 1506 err = PTR_ERR(newsk); 1507 newsk = NULL; 1508 goto out; 1509 } 1510 1511 err = -ENOMEM; 1512 1513 /* Allocate skb for sending to listening sock */ 1514 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1515 if (skb == NULL) 1516 goto out; 1517 1518 restart: 1519 /* Find listening sock. */ 1520 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1521 if (IS_ERR(other)) { 1522 err = PTR_ERR(other); 1523 other = NULL; 1524 goto out; 1525 } 1526 1527 /* Latch state of peer */ 1528 unix_state_lock(other); 1529 1530 /* Apparently VFS overslept socket death. Retry. */ 1531 if (sock_flag(other, SOCK_DEAD)) { 1532 unix_state_unlock(other); 1533 sock_put(other); 1534 goto restart; 1535 } 1536 1537 err = -ECONNREFUSED; 1538 if (other->sk_state != TCP_LISTEN) 1539 goto out_unlock; 1540 if (other->sk_shutdown & RCV_SHUTDOWN) 1541 goto out_unlock; 1542 1543 if (unix_recvq_full(other)) { 1544 err = -EAGAIN; 1545 if (!timeo) 1546 goto out_unlock; 1547 1548 timeo = unix_wait_for_peer(other, timeo); 1549 1550 err = sock_intr_errno(timeo); 1551 if (signal_pending(current)) 1552 goto out; 1553 sock_put(other); 1554 goto restart; 1555 } 1556 1557 /* Latch our state. 1558 1559 It is tricky place. We need to grab our state lock and cannot 1560 drop lock on peer. It is dangerous because deadlock is 1561 possible. Connect to self case and simultaneous 1562 attempt to connect are eliminated by checking socket 1563 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1564 check this before attempt to grab lock. 1565 1566 Well, and we have to recheck the state after socket locked. 1567 */ 1568 st = sk->sk_state; 1569 1570 switch (st) { 1571 case TCP_CLOSE: 1572 /* This is ok... continue with connect */ 1573 break; 1574 case TCP_ESTABLISHED: 1575 /* Socket is already connected */ 1576 err = -EISCONN; 1577 goto out_unlock; 1578 default: 1579 err = -EINVAL; 1580 goto out_unlock; 1581 } 1582 1583 unix_state_lock_nested(sk); 1584 1585 if (sk->sk_state != st) { 1586 unix_state_unlock(sk); 1587 unix_state_unlock(other); 1588 sock_put(other); 1589 goto restart; 1590 } 1591 1592 err = security_unix_stream_connect(sk, other, newsk); 1593 if (err) { 1594 unix_state_unlock(sk); 1595 goto out_unlock; 1596 } 1597 1598 /* The way is open! Fastly set all the necessary fields... */ 1599 1600 sock_hold(sk); 1601 unix_peer(newsk) = sk; 1602 newsk->sk_state = TCP_ESTABLISHED; 1603 newsk->sk_type = sk->sk_type; 1604 init_peercred(newsk); 1605 newu = unix_sk(newsk); 1606 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1607 otheru = unix_sk(other); 1608 1609 /* copy address information from listening to new sock 1610 * 1611 * The contents of *(otheru->addr) and otheru->path 1612 * are seen fully set up here, since we have found 1613 * otheru in hash under its lock. Insertion into the 1614 * hash chain we'd found it in had been done in an 1615 * earlier critical area protected by the chain's lock, 1616 * the same one where we'd set *(otheru->addr) contents, 1617 * as well as otheru->path and otheru->addr itself. 1618 * 1619 * Using smp_store_release() here to set newu->addr 1620 * is enough to make those stores, as well as stores 1621 * to newu->path visible to anyone who gets newu->addr 1622 * by smp_load_acquire(). IOW, the same warranties 1623 * as for unix_sock instances bound in unix_bind() or 1624 * in unix_autobind(). 1625 */ 1626 if (otheru->path.dentry) { 1627 path_get(&otheru->path); 1628 newu->path = otheru->path; 1629 } 1630 refcount_inc(&otheru->addr->refcnt); 1631 smp_store_release(&newu->addr, otheru->addr); 1632 1633 /* Set credentials */ 1634 copy_peercred(sk, other); 1635 1636 sock->state = SS_CONNECTED; 1637 sk->sk_state = TCP_ESTABLISHED; 1638 sock_hold(newsk); 1639 1640 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1641 unix_peer(sk) = newsk; 1642 1643 unix_state_unlock(sk); 1644 1645 /* take ten and send info to listening sock */ 1646 spin_lock(&other->sk_receive_queue.lock); 1647 __skb_queue_tail(&other->sk_receive_queue, skb); 1648 spin_unlock(&other->sk_receive_queue.lock); 1649 unix_state_unlock(other); 1650 other->sk_data_ready(other); 1651 sock_put(other); 1652 return 0; 1653 1654 out_unlock: 1655 if (other) 1656 unix_state_unlock(other); 1657 1658 out: 1659 kfree_skb(skb); 1660 if (newsk) 1661 unix_release_sock(newsk, 0); 1662 if (other) 1663 sock_put(other); 1664 return err; 1665 } 1666 1667 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1668 { 1669 struct sock *ska = socka->sk, *skb = sockb->sk; 1670 1671 /* Join our sockets back to back */ 1672 sock_hold(ska); 1673 sock_hold(skb); 1674 unix_peer(ska) = skb; 1675 unix_peer(skb) = ska; 1676 init_peercred(ska); 1677 init_peercred(skb); 1678 1679 ska->sk_state = TCP_ESTABLISHED; 1680 skb->sk_state = TCP_ESTABLISHED; 1681 socka->state = SS_CONNECTED; 1682 sockb->state = SS_CONNECTED; 1683 return 0; 1684 } 1685 1686 static void unix_sock_inherit_flags(const struct socket *old, 1687 struct socket *new) 1688 { 1689 if (test_bit(SOCK_PASSCRED, &old->flags)) 1690 set_bit(SOCK_PASSCRED, &new->flags); 1691 if (test_bit(SOCK_PASSPIDFD, &old->flags)) 1692 set_bit(SOCK_PASSPIDFD, &new->flags); 1693 if (test_bit(SOCK_PASSSEC, &old->flags)) 1694 set_bit(SOCK_PASSSEC, &new->flags); 1695 } 1696 1697 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1698 bool kern) 1699 { 1700 struct sock *sk = sock->sk; 1701 struct sock *tsk; 1702 struct sk_buff *skb; 1703 int err; 1704 1705 err = -EOPNOTSUPP; 1706 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1707 goto out; 1708 1709 err = -EINVAL; 1710 if (sk->sk_state != TCP_LISTEN) 1711 goto out; 1712 1713 /* If socket state is TCP_LISTEN it cannot change (for now...), 1714 * so that no locks are necessary. 1715 */ 1716 1717 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1718 &err); 1719 if (!skb) { 1720 /* This means receive shutdown. */ 1721 if (err == 0) 1722 err = -EINVAL; 1723 goto out; 1724 } 1725 1726 tsk = skb->sk; 1727 skb_free_datagram(sk, skb); 1728 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1729 1730 /* attach accepted sock to socket */ 1731 unix_state_lock(tsk); 1732 newsock->state = SS_CONNECTED; 1733 unix_sock_inherit_flags(sock, newsock); 1734 sock_graft(tsk, newsock); 1735 unix_state_unlock(tsk); 1736 return 0; 1737 1738 out: 1739 return err; 1740 } 1741 1742 1743 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1744 { 1745 struct sock *sk = sock->sk; 1746 struct unix_address *addr; 1747 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1748 int err = 0; 1749 1750 if (peer) { 1751 sk = unix_peer_get(sk); 1752 1753 err = -ENOTCONN; 1754 if (!sk) 1755 goto out; 1756 err = 0; 1757 } else { 1758 sock_hold(sk); 1759 } 1760 1761 addr = smp_load_acquire(&unix_sk(sk)->addr); 1762 if (!addr) { 1763 sunaddr->sun_family = AF_UNIX; 1764 sunaddr->sun_path[0] = 0; 1765 err = offsetof(struct sockaddr_un, sun_path); 1766 } else { 1767 err = addr->len; 1768 memcpy(sunaddr, addr->name, addr->len); 1769 } 1770 sock_put(sk); 1771 out: 1772 return err; 1773 } 1774 1775 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1776 { 1777 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1778 1779 /* 1780 * Garbage collection of unix sockets starts by selecting a set of 1781 * candidate sockets which have reference only from being in flight 1782 * (total_refs == inflight_refs). This condition is checked once during 1783 * the candidate collection phase, and candidates are marked as such, so 1784 * that non-candidates can later be ignored. While inflight_refs is 1785 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1786 * is an instantaneous decision. 1787 * 1788 * Once a candidate, however, the socket must not be reinstalled into a 1789 * file descriptor while the garbage collection is in progress. 1790 * 1791 * If the above conditions are met, then the directed graph of 1792 * candidates (*) does not change while unix_gc_lock is held. 1793 * 1794 * Any operations that changes the file count through file descriptors 1795 * (dup, close, sendmsg) does not change the graph since candidates are 1796 * not installed in fds. 1797 * 1798 * Dequeing a candidate via recvmsg would install it into an fd, but 1799 * that takes unix_gc_lock to decrement the inflight count, so it's 1800 * serialized with garbage collection. 1801 * 1802 * MSG_PEEK is special in that it does not change the inflight count, 1803 * yet does install the socket into an fd. The following lock/unlock 1804 * pair is to ensure serialization with garbage collection. It must be 1805 * done between incrementing the file count and installing the file into 1806 * an fd. 1807 * 1808 * If garbage collection starts after the barrier provided by the 1809 * lock/unlock, then it will see the elevated refcount and not mark this 1810 * as a candidate. If a garbage collection is already in progress 1811 * before the file count was incremented, then the lock/unlock pair will 1812 * ensure that garbage collection is finished before progressing to 1813 * installing the fd. 1814 * 1815 * (*) A -> B where B is on the queue of A or B is on the queue of C 1816 * which is on the queue of listening socket A. 1817 */ 1818 spin_lock(&unix_gc_lock); 1819 spin_unlock(&unix_gc_lock); 1820 } 1821 1822 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1823 { 1824 int err = 0; 1825 1826 UNIXCB(skb).pid = get_pid(scm->pid); 1827 UNIXCB(skb).uid = scm->creds.uid; 1828 UNIXCB(skb).gid = scm->creds.gid; 1829 UNIXCB(skb).fp = NULL; 1830 unix_get_secdata(scm, skb); 1831 if (scm->fp && send_fds) 1832 err = unix_attach_fds(scm, skb); 1833 1834 skb->destructor = unix_destruct_scm; 1835 return err; 1836 } 1837 1838 static bool unix_passcred_enabled(const struct socket *sock, 1839 const struct sock *other) 1840 { 1841 return test_bit(SOCK_PASSCRED, &sock->flags) || 1842 test_bit(SOCK_PASSPIDFD, &sock->flags) || 1843 !other->sk_socket || 1844 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || 1845 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); 1846 } 1847 1848 /* 1849 * Some apps rely on write() giving SCM_CREDENTIALS 1850 * We include credentials if source or destination socket 1851 * asserted SOCK_PASSCRED. 1852 */ 1853 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1854 const struct sock *other) 1855 { 1856 if (UNIXCB(skb).pid) 1857 return; 1858 if (unix_passcred_enabled(sock, other)) { 1859 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1860 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1861 } 1862 } 1863 1864 static bool unix_skb_scm_eq(struct sk_buff *skb, 1865 struct scm_cookie *scm) 1866 { 1867 return UNIXCB(skb).pid == scm->pid && 1868 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1869 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1870 unix_secdata_eq(scm, skb); 1871 } 1872 1873 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1874 { 1875 struct scm_fp_list *fp = UNIXCB(skb).fp; 1876 struct unix_sock *u = unix_sk(sk); 1877 1878 if (unlikely(fp && fp->count)) 1879 atomic_add(fp->count, &u->scm_stat.nr_fds); 1880 } 1881 1882 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1883 { 1884 struct scm_fp_list *fp = UNIXCB(skb).fp; 1885 struct unix_sock *u = unix_sk(sk); 1886 1887 if (unlikely(fp && fp->count)) 1888 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1889 } 1890 1891 /* 1892 * Send AF_UNIX data. 1893 */ 1894 1895 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1896 size_t len) 1897 { 1898 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1899 struct sock *sk = sock->sk, *other = NULL; 1900 struct unix_sock *u = unix_sk(sk); 1901 struct scm_cookie scm; 1902 struct sk_buff *skb; 1903 int data_len = 0; 1904 int sk_locked; 1905 long timeo; 1906 int err; 1907 1908 wait_for_unix_gc(); 1909 err = scm_send(sock, msg, &scm, false); 1910 if (err < 0) 1911 return err; 1912 1913 err = -EOPNOTSUPP; 1914 if (msg->msg_flags&MSG_OOB) 1915 goto out; 1916 1917 if (msg->msg_namelen) { 1918 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1919 if (err) 1920 goto out; 1921 } else { 1922 sunaddr = NULL; 1923 err = -ENOTCONN; 1924 other = unix_peer_get(sk); 1925 if (!other) 1926 goto out; 1927 } 1928 1929 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1930 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { 1931 err = unix_autobind(sk); 1932 if (err) 1933 goto out; 1934 } 1935 1936 err = -EMSGSIZE; 1937 if (len > sk->sk_sndbuf - 32) 1938 goto out; 1939 1940 if (len > SKB_MAX_ALLOC) { 1941 data_len = min_t(size_t, 1942 len - SKB_MAX_ALLOC, 1943 MAX_SKB_FRAGS * PAGE_SIZE); 1944 data_len = PAGE_ALIGN(data_len); 1945 1946 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1947 } 1948 1949 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1950 msg->msg_flags & MSG_DONTWAIT, &err, 1951 PAGE_ALLOC_COSTLY_ORDER); 1952 if (skb == NULL) 1953 goto out; 1954 1955 err = unix_scm_to_skb(&scm, skb, true); 1956 if (err < 0) 1957 goto out_free; 1958 1959 skb_put(skb, len - data_len); 1960 skb->data_len = data_len; 1961 skb->len = len; 1962 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1963 if (err) 1964 goto out_free; 1965 1966 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1967 1968 restart: 1969 if (!other) { 1970 err = -ECONNRESET; 1971 if (sunaddr == NULL) 1972 goto out_free; 1973 1974 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1975 sk->sk_type); 1976 if (IS_ERR(other)) { 1977 err = PTR_ERR(other); 1978 other = NULL; 1979 goto out_free; 1980 } 1981 } 1982 1983 if (sk_filter(other, skb) < 0) { 1984 /* Toss the packet but do not return any error to the sender */ 1985 err = len; 1986 goto out_free; 1987 } 1988 1989 sk_locked = 0; 1990 unix_state_lock(other); 1991 restart_locked: 1992 err = -EPERM; 1993 if (!unix_may_send(sk, other)) 1994 goto out_unlock; 1995 1996 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1997 /* 1998 * Check with 1003.1g - what should 1999 * datagram error 2000 */ 2001 unix_state_unlock(other); 2002 sock_put(other); 2003 2004 if (!sk_locked) 2005 unix_state_lock(sk); 2006 2007 err = 0; 2008 if (sk->sk_type == SOCK_SEQPACKET) { 2009 /* We are here only when racing with unix_release_sock() 2010 * is clearing @other. Never change state to TCP_CLOSE 2011 * unlike SOCK_DGRAM wants. 2012 */ 2013 unix_state_unlock(sk); 2014 err = -EPIPE; 2015 } else if (unix_peer(sk) == other) { 2016 unix_peer(sk) = NULL; 2017 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 2018 2019 sk->sk_state = TCP_CLOSE; 2020 unix_state_unlock(sk); 2021 2022 unix_dgram_disconnected(sk, other); 2023 sock_put(other); 2024 err = -ECONNREFUSED; 2025 } else { 2026 unix_state_unlock(sk); 2027 } 2028 2029 other = NULL; 2030 if (err) 2031 goto out_free; 2032 goto restart; 2033 } 2034 2035 err = -EPIPE; 2036 if (other->sk_shutdown & RCV_SHUTDOWN) 2037 goto out_unlock; 2038 2039 if (sk->sk_type != SOCK_SEQPACKET) { 2040 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 2041 if (err) 2042 goto out_unlock; 2043 } 2044 2045 /* other == sk && unix_peer(other) != sk if 2046 * - unix_peer(sk) == NULL, destination address bound to sk 2047 * - unix_peer(sk) == sk by time of get but disconnected before lock 2048 */ 2049 if (other != sk && 2050 unlikely(unix_peer(other) != sk && 2051 unix_recvq_full_lockless(other))) { 2052 if (timeo) { 2053 timeo = unix_wait_for_peer(other, timeo); 2054 2055 err = sock_intr_errno(timeo); 2056 if (signal_pending(current)) 2057 goto out_free; 2058 2059 goto restart; 2060 } 2061 2062 if (!sk_locked) { 2063 unix_state_unlock(other); 2064 unix_state_double_lock(sk, other); 2065 } 2066 2067 if (unix_peer(sk) != other || 2068 unix_dgram_peer_wake_me(sk, other)) { 2069 err = -EAGAIN; 2070 sk_locked = 1; 2071 goto out_unlock; 2072 } 2073 2074 if (!sk_locked) { 2075 sk_locked = 1; 2076 goto restart_locked; 2077 } 2078 } 2079 2080 if (unlikely(sk_locked)) 2081 unix_state_unlock(sk); 2082 2083 if (sock_flag(other, SOCK_RCVTSTAMP)) 2084 __net_timestamp(skb); 2085 maybe_add_creds(skb, sock, other); 2086 scm_stat_add(other, skb); 2087 skb_queue_tail(&other->sk_receive_queue, skb); 2088 unix_state_unlock(other); 2089 other->sk_data_ready(other); 2090 sock_put(other); 2091 scm_destroy(&scm); 2092 return len; 2093 2094 out_unlock: 2095 if (sk_locked) 2096 unix_state_unlock(sk); 2097 unix_state_unlock(other); 2098 out_free: 2099 kfree_skb(skb); 2100 out: 2101 if (other) 2102 sock_put(other); 2103 scm_destroy(&scm); 2104 return err; 2105 } 2106 2107 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2108 * bytes, and a minimum of a full page. 2109 */ 2110 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2111 2112 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2113 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, 2114 struct scm_cookie *scm, bool fds_sent) 2115 { 2116 struct unix_sock *ousk = unix_sk(other); 2117 struct sk_buff *skb; 2118 int err = 0; 2119 2120 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2121 2122 if (!skb) 2123 return err; 2124 2125 err = unix_scm_to_skb(scm, skb, !fds_sent); 2126 if (err < 0) { 2127 kfree_skb(skb); 2128 return err; 2129 } 2130 skb_put(skb, 1); 2131 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2132 2133 if (err) { 2134 kfree_skb(skb); 2135 return err; 2136 } 2137 2138 unix_state_lock(other); 2139 2140 if (sock_flag(other, SOCK_DEAD) || 2141 (other->sk_shutdown & RCV_SHUTDOWN)) { 2142 unix_state_unlock(other); 2143 kfree_skb(skb); 2144 return -EPIPE; 2145 } 2146 2147 maybe_add_creds(skb, sock, other); 2148 skb_get(skb); 2149 2150 if (ousk->oob_skb) 2151 consume_skb(ousk->oob_skb); 2152 2153 WRITE_ONCE(ousk->oob_skb, skb); 2154 2155 scm_stat_add(other, skb); 2156 skb_queue_tail(&other->sk_receive_queue, skb); 2157 sk_send_sigurg(other); 2158 unix_state_unlock(other); 2159 other->sk_data_ready(other); 2160 2161 return err; 2162 } 2163 #endif 2164 2165 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2166 size_t len) 2167 { 2168 struct sock *sk = sock->sk; 2169 struct sock *other = NULL; 2170 int err, size; 2171 struct sk_buff *skb; 2172 int sent = 0; 2173 struct scm_cookie scm; 2174 bool fds_sent = false; 2175 int data_len; 2176 2177 wait_for_unix_gc(); 2178 err = scm_send(sock, msg, &scm, false); 2179 if (err < 0) 2180 return err; 2181 2182 err = -EOPNOTSUPP; 2183 if (msg->msg_flags & MSG_OOB) { 2184 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2185 if (len) 2186 len--; 2187 else 2188 #endif 2189 goto out_err; 2190 } 2191 2192 if (msg->msg_namelen) { 2193 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2194 goto out_err; 2195 } else { 2196 err = -ENOTCONN; 2197 other = unix_peer(sk); 2198 if (!other) 2199 goto out_err; 2200 } 2201 2202 if (sk->sk_shutdown & SEND_SHUTDOWN) 2203 goto pipe_err; 2204 2205 while (sent < len) { 2206 size = len - sent; 2207 2208 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2209 skb = sock_alloc_send_pskb(sk, 0, 0, 2210 msg->msg_flags & MSG_DONTWAIT, 2211 &err, 0); 2212 } else { 2213 /* Keep two messages in the pipe so it schedules better */ 2214 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2215 2216 /* allow fallback to order-0 allocations */ 2217 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2218 2219 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2220 2221 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2222 2223 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2224 msg->msg_flags & MSG_DONTWAIT, &err, 2225 get_order(UNIX_SKB_FRAGS_SZ)); 2226 } 2227 if (!skb) 2228 goto out_err; 2229 2230 /* Only send the fds in the first buffer */ 2231 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2232 if (err < 0) { 2233 kfree_skb(skb); 2234 goto out_err; 2235 } 2236 fds_sent = true; 2237 2238 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2239 err = skb_splice_from_iter(skb, &msg->msg_iter, size, 2240 sk->sk_allocation); 2241 if (err < 0) { 2242 kfree_skb(skb); 2243 goto out_err; 2244 } 2245 size = err; 2246 refcount_add(size, &sk->sk_wmem_alloc); 2247 } else { 2248 skb_put(skb, size - data_len); 2249 skb->data_len = data_len; 2250 skb->len = size; 2251 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2252 if (err) { 2253 kfree_skb(skb); 2254 goto out_err; 2255 } 2256 } 2257 2258 unix_state_lock(other); 2259 2260 if (sock_flag(other, SOCK_DEAD) || 2261 (other->sk_shutdown & RCV_SHUTDOWN)) 2262 goto pipe_err_free; 2263 2264 maybe_add_creds(skb, sock, other); 2265 scm_stat_add(other, skb); 2266 skb_queue_tail(&other->sk_receive_queue, skb); 2267 unix_state_unlock(other); 2268 other->sk_data_ready(other); 2269 sent += size; 2270 } 2271 2272 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2273 if (msg->msg_flags & MSG_OOB) { 2274 err = queue_oob(sock, msg, other, &scm, fds_sent); 2275 if (err) 2276 goto out_err; 2277 sent++; 2278 } 2279 #endif 2280 2281 scm_destroy(&scm); 2282 2283 return sent; 2284 2285 pipe_err_free: 2286 unix_state_unlock(other); 2287 kfree_skb(skb); 2288 pipe_err: 2289 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2290 send_sig(SIGPIPE, current, 0); 2291 err = -EPIPE; 2292 out_err: 2293 scm_destroy(&scm); 2294 return sent ? : err; 2295 } 2296 2297 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2298 int offset, size_t size, int flags) 2299 { 2300 struct bio_vec bvec; 2301 struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES }; 2302 2303 if (flags & MSG_SENDPAGE_NOTLAST) 2304 msg.msg_flags |= MSG_MORE; 2305 2306 bvec_set_page(&bvec, page, size, offset); 2307 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); 2308 return unix_stream_sendmsg(socket, &msg, size); 2309 } 2310 2311 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2312 size_t len) 2313 { 2314 int err; 2315 struct sock *sk = sock->sk; 2316 2317 err = sock_error(sk); 2318 if (err) 2319 return err; 2320 2321 if (sk->sk_state != TCP_ESTABLISHED) 2322 return -ENOTCONN; 2323 2324 if (msg->msg_namelen) 2325 msg->msg_namelen = 0; 2326 2327 return unix_dgram_sendmsg(sock, msg, len); 2328 } 2329 2330 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2331 size_t size, int flags) 2332 { 2333 struct sock *sk = sock->sk; 2334 2335 if (sk->sk_state != TCP_ESTABLISHED) 2336 return -ENOTCONN; 2337 2338 return unix_dgram_recvmsg(sock, msg, size, flags); 2339 } 2340 2341 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2342 { 2343 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2344 2345 if (addr) { 2346 msg->msg_namelen = addr->len; 2347 memcpy(msg->msg_name, addr->name, addr->len); 2348 } 2349 } 2350 2351 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2352 int flags) 2353 { 2354 struct scm_cookie scm; 2355 struct socket *sock = sk->sk_socket; 2356 struct unix_sock *u = unix_sk(sk); 2357 struct sk_buff *skb, *last; 2358 long timeo; 2359 int skip; 2360 int err; 2361 2362 err = -EOPNOTSUPP; 2363 if (flags&MSG_OOB) 2364 goto out; 2365 2366 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2367 2368 do { 2369 mutex_lock(&u->iolock); 2370 2371 skip = sk_peek_offset(sk, flags); 2372 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2373 &skip, &err, &last); 2374 if (skb) { 2375 if (!(flags & MSG_PEEK)) 2376 scm_stat_del(sk, skb); 2377 break; 2378 } 2379 2380 mutex_unlock(&u->iolock); 2381 2382 if (err != -EAGAIN) 2383 break; 2384 } while (timeo && 2385 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2386 &err, &timeo, last)); 2387 2388 if (!skb) { /* implies iolock unlocked */ 2389 unix_state_lock(sk); 2390 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2391 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2392 (sk->sk_shutdown & RCV_SHUTDOWN)) 2393 err = 0; 2394 unix_state_unlock(sk); 2395 goto out; 2396 } 2397 2398 if (wq_has_sleeper(&u->peer_wait)) 2399 wake_up_interruptible_sync_poll(&u->peer_wait, 2400 EPOLLOUT | EPOLLWRNORM | 2401 EPOLLWRBAND); 2402 2403 if (msg->msg_name) 2404 unix_copy_addr(msg, skb->sk); 2405 2406 if (size > skb->len - skip) 2407 size = skb->len - skip; 2408 else if (size < skb->len - skip) 2409 msg->msg_flags |= MSG_TRUNC; 2410 2411 err = skb_copy_datagram_msg(skb, skip, msg, size); 2412 if (err) 2413 goto out_free; 2414 2415 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2416 __sock_recv_timestamp(msg, sk, skb); 2417 2418 memset(&scm, 0, sizeof(scm)); 2419 2420 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2421 unix_set_secdata(&scm, skb); 2422 2423 if (!(flags & MSG_PEEK)) { 2424 if (UNIXCB(skb).fp) 2425 unix_detach_fds(&scm, skb); 2426 2427 sk_peek_offset_bwd(sk, skb->len); 2428 } else { 2429 /* It is questionable: on PEEK we could: 2430 - do not return fds - good, but too simple 8) 2431 - return fds, and do not return them on read (old strategy, 2432 apparently wrong) 2433 - clone fds (I chose it for now, it is the most universal 2434 solution) 2435 2436 POSIX 1003.1g does not actually define this clearly 2437 at all. POSIX 1003.1g doesn't define a lot of things 2438 clearly however! 2439 2440 */ 2441 2442 sk_peek_offset_fwd(sk, size); 2443 2444 if (UNIXCB(skb).fp) 2445 unix_peek_fds(&scm, skb); 2446 } 2447 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2448 2449 scm_recv(sock, msg, &scm, flags); 2450 2451 out_free: 2452 skb_free_datagram(sk, skb); 2453 mutex_unlock(&u->iolock); 2454 out: 2455 return err; 2456 } 2457 2458 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2459 int flags) 2460 { 2461 struct sock *sk = sock->sk; 2462 2463 #ifdef CONFIG_BPF_SYSCALL 2464 const struct proto *prot = READ_ONCE(sk->sk_prot); 2465 2466 if (prot != &unix_dgram_proto) 2467 return prot->recvmsg(sk, msg, size, flags, NULL); 2468 #endif 2469 return __unix_dgram_recvmsg(sk, msg, size, flags); 2470 } 2471 2472 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2473 { 2474 struct unix_sock *u = unix_sk(sk); 2475 struct sk_buff *skb; 2476 int err; 2477 2478 mutex_lock(&u->iolock); 2479 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2480 mutex_unlock(&u->iolock); 2481 if (!skb) 2482 return err; 2483 2484 return recv_actor(sk, skb); 2485 } 2486 2487 /* 2488 * Sleep until more data has arrived. But check for races.. 2489 */ 2490 static long unix_stream_data_wait(struct sock *sk, long timeo, 2491 struct sk_buff *last, unsigned int last_len, 2492 bool freezable) 2493 { 2494 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; 2495 struct sk_buff *tail; 2496 DEFINE_WAIT(wait); 2497 2498 unix_state_lock(sk); 2499 2500 for (;;) { 2501 prepare_to_wait(sk_sleep(sk), &wait, state); 2502 2503 tail = skb_peek_tail(&sk->sk_receive_queue); 2504 if (tail != last || 2505 (tail && tail->len != last_len) || 2506 sk->sk_err || 2507 (sk->sk_shutdown & RCV_SHUTDOWN) || 2508 signal_pending(current) || 2509 !timeo) 2510 break; 2511 2512 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2513 unix_state_unlock(sk); 2514 timeo = schedule_timeout(timeo); 2515 unix_state_lock(sk); 2516 2517 if (sock_flag(sk, SOCK_DEAD)) 2518 break; 2519 2520 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2521 } 2522 2523 finish_wait(sk_sleep(sk), &wait); 2524 unix_state_unlock(sk); 2525 return timeo; 2526 } 2527 2528 static unsigned int unix_skb_len(const struct sk_buff *skb) 2529 { 2530 return skb->len - UNIXCB(skb).consumed; 2531 } 2532 2533 struct unix_stream_read_state { 2534 int (*recv_actor)(struct sk_buff *, int, int, 2535 struct unix_stream_read_state *); 2536 struct socket *socket; 2537 struct msghdr *msg; 2538 struct pipe_inode_info *pipe; 2539 size_t size; 2540 int flags; 2541 unsigned int splice_flags; 2542 }; 2543 2544 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2545 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2546 { 2547 struct socket *sock = state->socket; 2548 struct sock *sk = sock->sk; 2549 struct unix_sock *u = unix_sk(sk); 2550 int chunk = 1; 2551 struct sk_buff *oob_skb; 2552 2553 mutex_lock(&u->iolock); 2554 unix_state_lock(sk); 2555 2556 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2557 unix_state_unlock(sk); 2558 mutex_unlock(&u->iolock); 2559 return -EINVAL; 2560 } 2561 2562 oob_skb = u->oob_skb; 2563 2564 if (!(state->flags & MSG_PEEK)) 2565 WRITE_ONCE(u->oob_skb, NULL); 2566 2567 unix_state_unlock(sk); 2568 2569 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2570 2571 if (!(state->flags & MSG_PEEK)) { 2572 UNIXCB(oob_skb).consumed += 1; 2573 kfree_skb(oob_skb); 2574 } 2575 2576 mutex_unlock(&u->iolock); 2577 2578 if (chunk < 0) 2579 return -EFAULT; 2580 2581 state->msg->msg_flags |= MSG_OOB; 2582 return 1; 2583 } 2584 2585 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2586 int flags, int copied) 2587 { 2588 struct unix_sock *u = unix_sk(sk); 2589 2590 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2591 skb_unlink(skb, &sk->sk_receive_queue); 2592 consume_skb(skb); 2593 skb = NULL; 2594 } else { 2595 if (skb == u->oob_skb) { 2596 if (copied) { 2597 skb = NULL; 2598 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2599 if (!(flags & MSG_PEEK)) { 2600 WRITE_ONCE(u->oob_skb, NULL); 2601 consume_skb(skb); 2602 } 2603 } else if (!(flags & MSG_PEEK)) { 2604 skb_unlink(skb, &sk->sk_receive_queue); 2605 consume_skb(skb); 2606 skb = skb_peek(&sk->sk_receive_queue); 2607 } 2608 } 2609 } 2610 return skb; 2611 } 2612 #endif 2613 2614 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2615 { 2616 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2617 return -ENOTCONN; 2618 2619 return unix_read_skb(sk, recv_actor); 2620 } 2621 2622 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2623 bool freezable) 2624 { 2625 struct scm_cookie scm; 2626 struct socket *sock = state->socket; 2627 struct sock *sk = sock->sk; 2628 struct unix_sock *u = unix_sk(sk); 2629 int copied = 0; 2630 int flags = state->flags; 2631 int noblock = flags & MSG_DONTWAIT; 2632 bool check_creds = false; 2633 int target; 2634 int err = 0; 2635 long timeo; 2636 int skip; 2637 size_t size = state->size; 2638 unsigned int last_len; 2639 2640 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2641 err = -EINVAL; 2642 goto out; 2643 } 2644 2645 if (unlikely(flags & MSG_OOB)) { 2646 err = -EOPNOTSUPP; 2647 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2648 err = unix_stream_recv_urg(state); 2649 #endif 2650 goto out; 2651 } 2652 2653 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2654 timeo = sock_rcvtimeo(sk, noblock); 2655 2656 memset(&scm, 0, sizeof(scm)); 2657 2658 /* Lock the socket to prevent queue disordering 2659 * while sleeps in memcpy_tomsg 2660 */ 2661 mutex_lock(&u->iolock); 2662 2663 skip = max(sk_peek_offset(sk, flags), 0); 2664 2665 do { 2666 int chunk; 2667 bool drop_skb; 2668 struct sk_buff *skb, *last; 2669 2670 redo: 2671 unix_state_lock(sk); 2672 if (sock_flag(sk, SOCK_DEAD)) { 2673 err = -ECONNRESET; 2674 goto unlock; 2675 } 2676 last = skb = skb_peek(&sk->sk_receive_queue); 2677 last_len = last ? last->len : 0; 2678 2679 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2680 if (skb) { 2681 skb = manage_oob(skb, sk, flags, copied); 2682 if (!skb) { 2683 unix_state_unlock(sk); 2684 if (copied) 2685 break; 2686 goto redo; 2687 } 2688 } 2689 #endif 2690 again: 2691 if (skb == NULL) { 2692 if (copied >= target) 2693 goto unlock; 2694 2695 /* 2696 * POSIX 1003.1g mandates this order. 2697 */ 2698 2699 err = sock_error(sk); 2700 if (err) 2701 goto unlock; 2702 if (sk->sk_shutdown & RCV_SHUTDOWN) 2703 goto unlock; 2704 2705 unix_state_unlock(sk); 2706 if (!timeo) { 2707 err = -EAGAIN; 2708 break; 2709 } 2710 2711 mutex_unlock(&u->iolock); 2712 2713 timeo = unix_stream_data_wait(sk, timeo, last, 2714 last_len, freezable); 2715 2716 if (signal_pending(current)) { 2717 err = sock_intr_errno(timeo); 2718 scm_destroy(&scm); 2719 goto out; 2720 } 2721 2722 mutex_lock(&u->iolock); 2723 goto redo; 2724 unlock: 2725 unix_state_unlock(sk); 2726 break; 2727 } 2728 2729 while (skip >= unix_skb_len(skb)) { 2730 skip -= unix_skb_len(skb); 2731 last = skb; 2732 last_len = skb->len; 2733 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2734 if (!skb) 2735 goto again; 2736 } 2737 2738 unix_state_unlock(sk); 2739 2740 if (check_creds) { 2741 /* Never glue messages from different writers */ 2742 if (!unix_skb_scm_eq(skb, &scm)) 2743 break; 2744 } else if (test_bit(SOCK_PASSCRED, &sock->flags) || 2745 test_bit(SOCK_PASSPIDFD, &sock->flags)) { 2746 /* Copy credentials */ 2747 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2748 unix_set_secdata(&scm, skb); 2749 check_creds = true; 2750 } 2751 2752 /* Copy address just once */ 2753 if (state->msg && state->msg->msg_name) { 2754 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2755 state->msg->msg_name); 2756 unix_copy_addr(state->msg, skb->sk); 2757 sunaddr = NULL; 2758 } 2759 2760 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2761 skb_get(skb); 2762 chunk = state->recv_actor(skb, skip, chunk, state); 2763 drop_skb = !unix_skb_len(skb); 2764 /* skb is only safe to use if !drop_skb */ 2765 consume_skb(skb); 2766 if (chunk < 0) { 2767 if (copied == 0) 2768 copied = -EFAULT; 2769 break; 2770 } 2771 copied += chunk; 2772 size -= chunk; 2773 2774 if (drop_skb) { 2775 /* the skb was touched by a concurrent reader; 2776 * we should not expect anything from this skb 2777 * anymore and assume it invalid - we can be 2778 * sure it was dropped from the socket queue 2779 * 2780 * let's report a short read 2781 */ 2782 err = 0; 2783 break; 2784 } 2785 2786 /* Mark read part of skb as used */ 2787 if (!(flags & MSG_PEEK)) { 2788 UNIXCB(skb).consumed += chunk; 2789 2790 sk_peek_offset_bwd(sk, chunk); 2791 2792 if (UNIXCB(skb).fp) { 2793 scm_stat_del(sk, skb); 2794 unix_detach_fds(&scm, skb); 2795 } 2796 2797 if (unix_skb_len(skb)) 2798 break; 2799 2800 skb_unlink(skb, &sk->sk_receive_queue); 2801 consume_skb(skb); 2802 2803 if (scm.fp) 2804 break; 2805 } else { 2806 /* It is questionable, see note in unix_dgram_recvmsg. 2807 */ 2808 if (UNIXCB(skb).fp) 2809 unix_peek_fds(&scm, skb); 2810 2811 sk_peek_offset_fwd(sk, chunk); 2812 2813 if (UNIXCB(skb).fp) 2814 break; 2815 2816 skip = 0; 2817 last = skb; 2818 last_len = skb->len; 2819 unix_state_lock(sk); 2820 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2821 if (skb) 2822 goto again; 2823 unix_state_unlock(sk); 2824 break; 2825 } 2826 } while (size); 2827 2828 mutex_unlock(&u->iolock); 2829 if (state->msg) 2830 scm_recv(sock, state->msg, &scm, flags); 2831 else 2832 scm_destroy(&scm); 2833 out: 2834 return copied ? : err; 2835 } 2836 2837 static int unix_stream_read_actor(struct sk_buff *skb, 2838 int skip, int chunk, 2839 struct unix_stream_read_state *state) 2840 { 2841 int ret; 2842 2843 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2844 state->msg, chunk); 2845 return ret ?: chunk; 2846 } 2847 2848 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2849 size_t size, int flags) 2850 { 2851 struct unix_stream_read_state state = { 2852 .recv_actor = unix_stream_read_actor, 2853 .socket = sk->sk_socket, 2854 .msg = msg, 2855 .size = size, 2856 .flags = flags 2857 }; 2858 2859 return unix_stream_read_generic(&state, true); 2860 } 2861 2862 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2863 size_t size, int flags) 2864 { 2865 struct unix_stream_read_state state = { 2866 .recv_actor = unix_stream_read_actor, 2867 .socket = sock, 2868 .msg = msg, 2869 .size = size, 2870 .flags = flags 2871 }; 2872 2873 #ifdef CONFIG_BPF_SYSCALL 2874 struct sock *sk = sock->sk; 2875 const struct proto *prot = READ_ONCE(sk->sk_prot); 2876 2877 if (prot != &unix_stream_proto) 2878 return prot->recvmsg(sk, msg, size, flags, NULL); 2879 #endif 2880 return unix_stream_read_generic(&state, true); 2881 } 2882 2883 static int unix_stream_splice_actor(struct sk_buff *skb, 2884 int skip, int chunk, 2885 struct unix_stream_read_state *state) 2886 { 2887 return skb_splice_bits(skb, state->socket->sk, 2888 UNIXCB(skb).consumed + skip, 2889 state->pipe, chunk, state->splice_flags); 2890 } 2891 2892 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2893 struct pipe_inode_info *pipe, 2894 size_t size, unsigned int flags) 2895 { 2896 struct unix_stream_read_state state = { 2897 .recv_actor = unix_stream_splice_actor, 2898 .socket = sock, 2899 .pipe = pipe, 2900 .size = size, 2901 .splice_flags = flags, 2902 }; 2903 2904 if (unlikely(*ppos)) 2905 return -ESPIPE; 2906 2907 if (sock->file->f_flags & O_NONBLOCK || 2908 flags & SPLICE_F_NONBLOCK) 2909 state.flags = MSG_DONTWAIT; 2910 2911 return unix_stream_read_generic(&state, false); 2912 } 2913 2914 static int unix_shutdown(struct socket *sock, int mode) 2915 { 2916 struct sock *sk = sock->sk; 2917 struct sock *other; 2918 2919 if (mode < SHUT_RD || mode > SHUT_RDWR) 2920 return -EINVAL; 2921 /* This maps: 2922 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2923 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2924 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2925 */ 2926 ++mode; 2927 2928 unix_state_lock(sk); 2929 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); 2930 other = unix_peer(sk); 2931 if (other) 2932 sock_hold(other); 2933 unix_state_unlock(sk); 2934 sk->sk_state_change(sk); 2935 2936 if (other && 2937 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2938 2939 int peer_mode = 0; 2940 const struct proto *prot = READ_ONCE(other->sk_prot); 2941 2942 if (prot->unhash) 2943 prot->unhash(other); 2944 if (mode&RCV_SHUTDOWN) 2945 peer_mode |= SEND_SHUTDOWN; 2946 if (mode&SEND_SHUTDOWN) 2947 peer_mode |= RCV_SHUTDOWN; 2948 unix_state_lock(other); 2949 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); 2950 unix_state_unlock(other); 2951 other->sk_state_change(other); 2952 if (peer_mode == SHUTDOWN_MASK) 2953 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2954 else if (peer_mode & RCV_SHUTDOWN) 2955 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2956 } 2957 if (other) 2958 sock_put(other); 2959 2960 return 0; 2961 } 2962 2963 long unix_inq_len(struct sock *sk) 2964 { 2965 struct sk_buff *skb; 2966 long amount = 0; 2967 2968 if (sk->sk_state == TCP_LISTEN) 2969 return -EINVAL; 2970 2971 spin_lock(&sk->sk_receive_queue.lock); 2972 if (sk->sk_type == SOCK_STREAM || 2973 sk->sk_type == SOCK_SEQPACKET) { 2974 skb_queue_walk(&sk->sk_receive_queue, skb) 2975 amount += unix_skb_len(skb); 2976 } else { 2977 skb = skb_peek(&sk->sk_receive_queue); 2978 if (skb) 2979 amount = skb->len; 2980 } 2981 spin_unlock(&sk->sk_receive_queue.lock); 2982 2983 return amount; 2984 } 2985 EXPORT_SYMBOL_GPL(unix_inq_len); 2986 2987 long unix_outq_len(struct sock *sk) 2988 { 2989 return sk_wmem_alloc_get(sk); 2990 } 2991 EXPORT_SYMBOL_GPL(unix_outq_len); 2992 2993 static int unix_open_file(struct sock *sk) 2994 { 2995 struct path path; 2996 struct file *f; 2997 int fd; 2998 2999 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3000 return -EPERM; 3001 3002 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3003 return -ENOENT; 3004 3005 path = unix_sk(sk)->path; 3006 if (!path.dentry) 3007 return -ENOENT; 3008 3009 path_get(&path); 3010 3011 fd = get_unused_fd_flags(O_CLOEXEC); 3012 if (fd < 0) 3013 goto out; 3014 3015 f = dentry_open(&path, O_PATH, current_cred()); 3016 if (IS_ERR(f)) { 3017 put_unused_fd(fd); 3018 fd = PTR_ERR(f); 3019 goto out; 3020 } 3021 3022 fd_install(fd, f); 3023 out: 3024 path_put(&path); 3025 3026 return fd; 3027 } 3028 3029 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3030 { 3031 struct sock *sk = sock->sk; 3032 long amount = 0; 3033 int err; 3034 3035 switch (cmd) { 3036 case SIOCOUTQ: 3037 amount = unix_outq_len(sk); 3038 err = put_user(amount, (int __user *)arg); 3039 break; 3040 case SIOCINQ: 3041 amount = unix_inq_len(sk); 3042 if (amount < 0) 3043 err = amount; 3044 else 3045 err = put_user(amount, (int __user *)arg); 3046 break; 3047 case SIOCUNIXFILE: 3048 err = unix_open_file(sk); 3049 break; 3050 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3051 case SIOCATMARK: 3052 { 3053 struct sk_buff *skb; 3054 int answ = 0; 3055 3056 skb = skb_peek(&sk->sk_receive_queue); 3057 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3058 answ = 1; 3059 err = put_user(answ, (int __user *)arg); 3060 } 3061 break; 3062 #endif 3063 default: 3064 err = -ENOIOCTLCMD; 3065 break; 3066 } 3067 return err; 3068 } 3069 3070 #ifdef CONFIG_COMPAT 3071 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3072 { 3073 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3074 } 3075 #endif 3076 3077 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3078 { 3079 struct sock *sk = sock->sk; 3080 __poll_t mask; 3081 u8 shutdown; 3082 3083 sock_poll_wait(file, sock, wait); 3084 mask = 0; 3085 shutdown = READ_ONCE(sk->sk_shutdown); 3086 3087 /* exceptional events? */ 3088 if (READ_ONCE(sk->sk_err)) 3089 mask |= EPOLLERR; 3090 if (shutdown == SHUTDOWN_MASK) 3091 mask |= EPOLLHUP; 3092 if (shutdown & RCV_SHUTDOWN) 3093 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3094 3095 /* readable? */ 3096 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3097 mask |= EPOLLIN | EPOLLRDNORM; 3098 if (sk_is_readable(sk)) 3099 mask |= EPOLLIN | EPOLLRDNORM; 3100 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3101 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3102 mask |= EPOLLPRI; 3103 #endif 3104 3105 /* Connection-based need to check for termination and startup */ 3106 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3107 sk->sk_state == TCP_CLOSE) 3108 mask |= EPOLLHUP; 3109 3110 /* 3111 * we set writable also when the other side has shut down the 3112 * connection. This prevents stuck sockets. 3113 */ 3114 if (unix_writable(sk)) 3115 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3116 3117 return mask; 3118 } 3119 3120 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3121 poll_table *wait) 3122 { 3123 struct sock *sk = sock->sk, *other; 3124 unsigned int writable; 3125 __poll_t mask; 3126 u8 shutdown; 3127 3128 sock_poll_wait(file, sock, wait); 3129 mask = 0; 3130 shutdown = READ_ONCE(sk->sk_shutdown); 3131 3132 /* exceptional events? */ 3133 if (READ_ONCE(sk->sk_err) || 3134 !skb_queue_empty_lockless(&sk->sk_error_queue)) 3135 mask |= EPOLLERR | 3136 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3137 3138 if (shutdown & RCV_SHUTDOWN) 3139 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3140 if (shutdown == SHUTDOWN_MASK) 3141 mask |= EPOLLHUP; 3142 3143 /* readable? */ 3144 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3145 mask |= EPOLLIN | EPOLLRDNORM; 3146 if (sk_is_readable(sk)) 3147 mask |= EPOLLIN | EPOLLRDNORM; 3148 3149 /* Connection-based need to check for termination and startup */ 3150 if (sk->sk_type == SOCK_SEQPACKET) { 3151 if (sk->sk_state == TCP_CLOSE) 3152 mask |= EPOLLHUP; 3153 /* connection hasn't started yet? */ 3154 if (sk->sk_state == TCP_SYN_SENT) 3155 return mask; 3156 } 3157 3158 /* No write status requested, avoid expensive OUT tests. */ 3159 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3160 return mask; 3161 3162 writable = unix_writable(sk); 3163 if (writable) { 3164 unix_state_lock(sk); 3165 3166 other = unix_peer(sk); 3167 if (other && unix_peer(other) != sk && 3168 unix_recvq_full_lockless(other) && 3169 unix_dgram_peer_wake_me(sk, other)) 3170 writable = 0; 3171 3172 unix_state_unlock(sk); 3173 } 3174 3175 if (writable) 3176 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3177 else 3178 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3179 3180 return mask; 3181 } 3182 3183 #ifdef CONFIG_PROC_FS 3184 3185 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3186 3187 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3188 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3189 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3190 3191 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3192 { 3193 unsigned long offset = get_offset(*pos); 3194 unsigned long bucket = get_bucket(*pos); 3195 unsigned long count = 0; 3196 struct sock *sk; 3197 3198 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3199 sk; sk = sk_next(sk)) { 3200 if (++count == offset) 3201 break; 3202 } 3203 3204 return sk; 3205 } 3206 3207 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3208 { 3209 unsigned long bucket = get_bucket(*pos); 3210 struct net *net = seq_file_net(seq); 3211 struct sock *sk; 3212 3213 while (bucket < UNIX_HASH_SIZE) { 3214 spin_lock(&net->unx.table.locks[bucket]); 3215 3216 sk = unix_from_bucket(seq, pos); 3217 if (sk) 3218 return sk; 3219 3220 spin_unlock(&net->unx.table.locks[bucket]); 3221 3222 *pos = set_bucket_offset(++bucket, 1); 3223 } 3224 3225 return NULL; 3226 } 3227 3228 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3229 loff_t *pos) 3230 { 3231 unsigned long bucket = get_bucket(*pos); 3232 3233 sk = sk_next(sk); 3234 if (sk) 3235 return sk; 3236 3237 3238 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3239 3240 *pos = set_bucket_offset(++bucket, 1); 3241 3242 return unix_get_first(seq, pos); 3243 } 3244 3245 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3246 { 3247 if (!*pos) 3248 return SEQ_START_TOKEN; 3249 3250 return unix_get_first(seq, pos); 3251 } 3252 3253 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3254 { 3255 ++*pos; 3256 3257 if (v == SEQ_START_TOKEN) 3258 return unix_get_first(seq, pos); 3259 3260 return unix_get_next(seq, v, pos); 3261 } 3262 3263 static void unix_seq_stop(struct seq_file *seq, void *v) 3264 { 3265 struct sock *sk = v; 3266 3267 if (sk) 3268 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3269 } 3270 3271 static int unix_seq_show(struct seq_file *seq, void *v) 3272 { 3273 3274 if (v == SEQ_START_TOKEN) 3275 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3276 "Inode Path\n"); 3277 else { 3278 struct sock *s = v; 3279 struct unix_sock *u = unix_sk(s); 3280 unix_state_lock(s); 3281 3282 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3283 s, 3284 refcount_read(&s->sk_refcnt), 3285 0, 3286 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3287 s->sk_type, 3288 s->sk_socket ? 3289 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3290 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3291 sock_i_ino(s)); 3292 3293 if (u->addr) { // under a hash table lock here 3294 int i, len; 3295 seq_putc(seq, ' '); 3296 3297 i = 0; 3298 len = u->addr->len - 3299 offsetof(struct sockaddr_un, sun_path); 3300 if (u->addr->name->sun_path[0]) { 3301 len--; 3302 } else { 3303 seq_putc(seq, '@'); 3304 i++; 3305 } 3306 for ( ; i < len; i++) 3307 seq_putc(seq, u->addr->name->sun_path[i] ?: 3308 '@'); 3309 } 3310 unix_state_unlock(s); 3311 seq_putc(seq, '\n'); 3312 } 3313 3314 return 0; 3315 } 3316 3317 static const struct seq_operations unix_seq_ops = { 3318 .start = unix_seq_start, 3319 .next = unix_seq_next, 3320 .stop = unix_seq_stop, 3321 .show = unix_seq_show, 3322 }; 3323 3324 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3325 struct bpf_unix_iter_state { 3326 struct seq_net_private p; 3327 unsigned int cur_sk; 3328 unsigned int end_sk; 3329 unsigned int max_sk; 3330 struct sock **batch; 3331 bool st_bucket_done; 3332 }; 3333 3334 struct bpf_iter__unix { 3335 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3336 __bpf_md_ptr(struct unix_sock *, unix_sk); 3337 uid_t uid __aligned(8); 3338 }; 3339 3340 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3341 struct unix_sock *unix_sk, uid_t uid) 3342 { 3343 struct bpf_iter__unix ctx; 3344 3345 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3346 ctx.meta = meta; 3347 ctx.unix_sk = unix_sk; 3348 ctx.uid = uid; 3349 return bpf_iter_run_prog(prog, &ctx); 3350 } 3351 3352 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3353 3354 { 3355 struct bpf_unix_iter_state *iter = seq->private; 3356 unsigned int expected = 1; 3357 struct sock *sk; 3358 3359 sock_hold(start_sk); 3360 iter->batch[iter->end_sk++] = start_sk; 3361 3362 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3363 if (iter->end_sk < iter->max_sk) { 3364 sock_hold(sk); 3365 iter->batch[iter->end_sk++] = sk; 3366 } 3367 3368 expected++; 3369 } 3370 3371 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3372 3373 return expected; 3374 } 3375 3376 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3377 { 3378 while (iter->cur_sk < iter->end_sk) 3379 sock_put(iter->batch[iter->cur_sk++]); 3380 } 3381 3382 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3383 unsigned int new_batch_sz) 3384 { 3385 struct sock **new_batch; 3386 3387 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3388 GFP_USER | __GFP_NOWARN); 3389 if (!new_batch) 3390 return -ENOMEM; 3391 3392 bpf_iter_unix_put_batch(iter); 3393 kvfree(iter->batch); 3394 iter->batch = new_batch; 3395 iter->max_sk = new_batch_sz; 3396 3397 return 0; 3398 } 3399 3400 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3401 loff_t *pos) 3402 { 3403 struct bpf_unix_iter_state *iter = seq->private; 3404 unsigned int expected; 3405 bool resized = false; 3406 struct sock *sk; 3407 3408 if (iter->st_bucket_done) 3409 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3410 3411 again: 3412 /* Get a new batch */ 3413 iter->cur_sk = 0; 3414 iter->end_sk = 0; 3415 3416 sk = unix_get_first(seq, pos); 3417 if (!sk) 3418 return NULL; /* Done */ 3419 3420 expected = bpf_iter_unix_hold_batch(seq, sk); 3421 3422 if (iter->end_sk == expected) { 3423 iter->st_bucket_done = true; 3424 return sk; 3425 } 3426 3427 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3428 resized = true; 3429 goto again; 3430 } 3431 3432 return sk; 3433 } 3434 3435 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3436 { 3437 if (!*pos) 3438 return SEQ_START_TOKEN; 3439 3440 /* bpf iter does not support lseek, so it always 3441 * continue from where it was stop()-ped. 3442 */ 3443 return bpf_iter_unix_batch(seq, pos); 3444 } 3445 3446 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3447 { 3448 struct bpf_unix_iter_state *iter = seq->private; 3449 struct sock *sk; 3450 3451 /* Whenever seq_next() is called, the iter->cur_sk is 3452 * done with seq_show(), so advance to the next sk in 3453 * the batch. 3454 */ 3455 if (iter->cur_sk < iter->end_sk) 3456 sock_put(iter->batch[iter->cur_sk++]); 3457 3458 ++*pos; 3459 3460 if (iter->cur_sk < iter->end_sk) 3461 sk = iter->batch[iter->cur_sk]; 3462 else 3463 sk = bpf_iter_unix_batch(seq, pos); 3464 3465 return sk; 3466 } 3467 3468 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3469 { 3470 struct bpf_iter_meta meta; 3471 struct bpf_prog *prog; 3472 struct sock *sk = v; 3473 uid_t uid; 3474 bool slow; 3475 int ret; 3476 3477 if (v == SEQ_START_TOKEN) 3478 return 0; 3479 3480 slow = lock_sock_fast(sk); 3481 3482 if (unlikely(sk_unhashed(sk))) { 3483 ret = SEQ_SKIP; 3484 goto unlock; 3485 } 3486 3487 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3488 meta.seq = seq; 3489 prog = bpf_iter_get_info(&meta, false); 3490 ret = unix_prog_seq_show(prog, &meta, v, uid); 3491 unlock: 3492 unlock_sock_fast(sk, slow); 3493 return ret; 3494 } 3495 3496 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3497 { 3498 struct bpf_unix_iter_state *iter = seq->private; 3499 struct bpf_iter_meta meta; 3500 struct bpf_prog *prog; 3501 3502 if (!v) { 3503 meta.seq = seq; 3504 prog = bpf_iter_get_info(&meta, true); 3505 if (prog) 3506 (void)unix_prog_seq_show(prog, &meta, v, 0); 3507 } 3508 3509 if (iter->cur_sk < iter->end_sk) 3510 bpf_iter_unix_put_batch(iter); 3511 } 3512 3513 static const struct seq_operations bpf_iter_unix_seq_ops = { 3514 .start = bpf_iter_unix_seq_start, 3515 .next = bpf_iter_unix_seq_next, 3516 .stop = bpf_iter_unix_seq_stop, 3517 .show = bpf_iter_unix_seq_show, 3518 }; 3519 #endif 3520 #endif 3521 3522 static const struct net_proto_family unix_family_ops = { 3523 .family = PF_UNIX, 3524 .create = unix_create, 3525 .owner = THIS_MODULE, 3526 }; 3527 3528 3529 static int __net_init unix_net_init(struct net *net) 3530 { 3531 int i; 3532 3533 net->unx.sysctl_max_dgram_qlen = 10; 3534 if (unix_sysctl_register(net)) 3535 goto out; 3536 3537 #ifdef CONFIG_PROC_FS 3538 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3539 sizeof(struct seq_net_private))) 3540 goto err_sysctl; 3541 #endif 3542 3543 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3544 sizeof(spinlock_t), GFP_KERNEL); 3545 if (!net->unx.table.locks) 3546 goto err_proc; 3547 3548 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3549 sizeof(struct hlist_head), 3550 GFP_KERNEL); 3551 if (!net->unx.table.buckets) 3552 goto free_locks; 3553 3554 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3555 spin_lock_init(&net->unx.table.locks[i]); 3556 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3557 } 3558 3559 return 0; 3560 3561 free_locks: 3562 kvfree(net->unx.table.locks); 3563 err_proc: 3564 #ifdef CONFIG_PROC_FS 3565 remove_proc_entry("unix", net->proc_net); 3566 err_sysctl: 3567 #endif 3568 unix_sysctl_unregister(net); 3569 out: 3570 return -ENOMEM; 3571 } 3572 3573 static void __net_exit unix_net_exit(struct net *net) 3574 { 3575 kvfree(net->unx.table.buckets); 3576 kvfree(net->unx.table.locks); 3577 unix_sysctl_unregister(net); 3578 remove_proc_entry("unix", net->proc_net); 3579 } 3580 3581 static struct pernet_operations unix_net_ops = { 3582 .init = unix_net_init, 3583 .exit = unix_net_exit, 3584 }; 3585 3586 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3587 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3588 struct unix_sock *unix_sk, uid_t uid) 3589 3590 #define INIT_BATCH_SZ 16 3591 3592 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3593 { 3594 struct bpf_unix_iter_state *iter = priv_data; 3595 int err; 3596 3597 err = bpf_iter_init_seq_net(priv_data, aux); 3598 if (err) 3599 return err; 3600 3601 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3602 if (err) { 3603 bpf_iter_fini_seq_net(priv_data); 3604 return err; 3605 } 3606 3607 return 0; 3608 } 3609 3610 static void bpf_iter_fini_unix(void *priv_data) 3611 { 3612 struct bpf_unix_iter_state *iter = priv_data; 3613 3614 bpf_iter_fini_seq_net(priv_data); 3615 kvfree(iter->batch); 3616 } 3617 3618 static const struct bpf_iter_seq_info unix_seq_info = { 3619 .seq_ops = &bpf_iter_unix_seq_ops, 3620 .init_seq_private = bpf_iter_init_unix, 3621 .fini_seq_private = bpf_iter_fini_unix, 3622 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3623 }; 3624 3625 static const struct bpf_func_proto * 3626 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3627 const struct bpf_prog *prog) 3628 { 3629 switch (func_id) { 3630 case BPF_FUNC_setsockopt: 3631 return &bpf_sk_setsockopt_proto; 3632 case BPF_FUNC_getsockopt: 3633 return &bpf_sk_getsockopt_proto; 3634 default: 3635 return NULL; 3636 } 3637 } 3638 3639 static struct bpf_iter_reg unix_reg_info = { 3640 .target = "unix", 3641 .ctx_arg_info_size = 1, 3642 .ctx_arg_info = { 3643 { offsetof(struct bpf_iter__unix, unix_sk), 3644 PTR_TO_BTF_ID_OR_NULL }, 3645 }, 3646 .get_func_proto = bpf_iter_unix_get_func_proto, 3647 .seq_info = &unix_seq_info, 3648 }; 3649 3650 static void __init bpf_iter_register(void) 3651 { 3652 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3653 if (bpf_iter_reg_target(&unix_reg_info)) 3654 pr_warn("Warning: could not register bpf iterator unix\n"); 3655 } 3656 #endif 3657 3658 static int __init af_unix_init(void) 3659 { 3660 int i, rc = -1; 3661 3662 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3663 3664 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3665 spin_lock_init(&bsd_socket_locks[i]); 3666 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3667 } 3668 3669 rc = proto_register(&unix_dgram_proto, 1); 3670 if (rc != 0) { 3671 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3672 goto out; 3673 } 3674 3675 rc = proto_register(&unix_stream_proto, 1); 3676 if (rc != 0) { 3677 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3678 proto_unregister(&unix_dgram_proto); 3679 goto out; 3680 } 3681 3682 sock_register(&unix_family_ops); 3683 register_pernet_subsys(&unix_net_ops); 3684 unix_bpf_build_proto(); 3685 3686 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3687 bpf_iter_register(); 3688 #endif 3689 3690 out: 3691 return rc; 3692 } 3693 3694 static void __exit af_unix_exit(void) 3695 { 3696 sock_unregister(PF_UNIX); 3697 proto_unregister(&unix_dgram_proto); 3698 proto_unregister(&unix_stream_proto); 3699 unregister_pernet_subsys(&unix_net_ops); 3700 } 3701 3702 /* Earlier than device_initcall() so that other drivers invoking 3703 request_module() don't end up in a loop when modprobe tries 3704 to use a UNIX socket. But later than subsys_initcall() because 3705 we depend on stuff initialised there */ 3706 fs_initcall(af_unix_init); 3707 module_exit(af_unix_exit); 3708 3709 MODULE_LICENSE("GPL"); 3710 MODULE_ALIAS_NETPROTO(PF_UNIX); 3711