1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/splice.h> 116 #include <linux/freezer.h> 117 #include <linux/file.h> 118 #include <linux/btf_ids.h> 119 120 #include "scm.h" 121 122 static atomic_long_t unix_nr_socks; 123 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 124 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 125 126 /* SMP locking strategy: 127 * hash table is protected with spinlock. 128 * each socket state is protected by separate spinlock. 129 */ 130 131 static unsigned int unix_unbound_hash(struct sock *sk) 132 { 133 unsigned long hash = (unsigned long)sk; 134 135 hash ^= hash >> 16; 136 hash ^= hash >> 8; 137 hash ^= sk->sk_type; 138 139 return hash & UNIX_HASH_MOD; 140 } 141 142 static unsigned int unix_bsd_hash(struct inode *i) 143 { 144 return i->i_ino & UNIX_HASH_MOD; 145 } 146 147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 148 int addr_len, int type) 149 { 150 __wsum csum = csum_partial(sunaddr, addr_len, 0); 151 unsigned int hash; 152 153 hash = (__force unsigned int)csum_fold(csum); 154 hash ^= hash >> 8; 155 hash ^= type; 156 157 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 158 } 159 160 static void unix_table_double_lock(struct net *net, 161 unsigned int hash1, unsigned int hash2) 162 { 163 if (hash1 == hash2) { 164 spin_lock(&net->unx.table.locks[hash1]); 165 return; 166 } 167 168 if (hash1 > hash2) 169 swap(hash1, hash2); 170 171 spin_lock(&net->unx.table.locks[hash1]); 172 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 173 } 174 175 static void unix_table_double_unlock(struct net *net, 176 unsigned int hash1, unsigned int hash2) 177 { 178 if (hash1 == hash2) { 179 spin_unlock(&net->unx.table.locks[hash1]); 180 return; 181 } 182 183 spin_unlock(&net->unx.table.locks[hash1]); 184 spin_unlock(&net->unx.table.locks[hash2]); 185 } 186 187 #ifdef CONFIG_SECURITY_NETWORK 188 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 189 { 190 UNIXCB(skb).secid = scm->secid; 191 } 192 193 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 194 { 195 scm->secid = UNIXCB(skb).secid; 196 } 197 198 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 199 { 200 return (scm->secid == UNIXCB(skb).secid); 201 } 202 #else 203 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 204 { } 205 206 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 207 { } 208 209 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 210 { 211 return true; 212 } 213 #endif /* CONFIG_SECURITY_NETWORK */ 214 215 #define unix_peer(sk) (unix_sk(sk)->peer) 216 217 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 218 { 219 return unix_peer(osk) == sk; 220 } 221 222 static inline int unix_may_send(struct sock *sk, struct sock *osk) 223 { 224 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 225 } 226 227 static inline int unix_recvq_full(const struct sock *sk) 228 { 229 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 230 } 231 232 static inline int unix_recvq_full_lockless(const struct sock *sk) 233 { 234 return skb_queue_len_lockless(&sk->sk_receive_queue) > 235 READ_ONCE(sk->sk_max_ack_backlog); 236 } 237 238 struct sock *unix_peer_get(struct sock *s) 239 { 240 struct sock *peer; 241 242 unix_state_lock(s); 243 peer = unix_peer(s); 244 if (peer) 245 sock_hold(peer); 246 unix_state_unlock(s); 247 return peer; 248 } 249 EXPORT_SYMBOL_GPL(unix_peer_get); 250 251 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 252 int addr_len) 253 { 254 struct unix_address *addr; 255 256 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 257 if (!addr) 258 return NULL; 259 260 refcount_set(&addr->refcnt, 1); 261 addr->len = addr_len; 262 memcpy(addr->name, sunaddr, addr_len); 263 264 return addr; 265 } 266 267 static inline void unix_release_addr(struct unix_address *addr) 268 { 269 if (refcount_dec_and_test(&addr->refcnt)) 270 kfree(addr); 271 } 272 273 /* 274 * Check unix socket name: 275 * - should be not zero length. 276 * - if started by not zero, should be NULL terminated (FS object) 277 * - if started by zero, it is abstract name. 278 */ 279 280 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 281 { 282 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 283 addr_len > sizeof(*sunaddr)) 284 return -EINVAL; 285 286 if (sunaddr->sun_family != AF_UNIX) 287 return -EINVAL; 288 289 return 0; 290 } 291 292 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 293 { 294 /* This may look like an off by one error but it is a bit more 295 * subtle. 108 is the longest valid AF_UNIX path for a binding. 296 * sun_path[108] doesn't as such exist. However in kernel space 297 * we are guaranteed that it is a valid memory location in our 298 * kernel address buffer because syscall functions always pass 299 * a pointer of struct sockaddr_storage which has a bigger buffer 300 * than 108. 301 */ 302 ((char *)sunaddr)[addr_len] = 0; 303 } 304 305 static void __unix_remove_socket(struct sock *sk) 306 { 307 sk_del_node_init(sk); 308 } 309 310 static void __unix_insert_socket(struct net *net, struct sock *sk) 311 { 312 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 313 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 314 } 315 316 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 317 struct unix_address *addr, unsigned int hash) 318 { 319 __unix_remove_socket(sk); 320 smp_store_release(&unix_sk(sk)->addr, addr); 321 322 sk->sk_hash = hash; 323 __unix_insert_socket(net, sk); 324 } 325 326 static void unix_remove_socket(struct net *net, struct sock *sk) 327 { 328 spin_lock(&net->unx.table.locks[sk->sk_hash]); 329 __unix_remove_socket(sk); 330 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 331 } 332 333 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 334 { 335 spin_lock(&net->unx.table.locks[sk->sk_hash]); 336 __unix_insert_socket(net, sk); 337 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 338 } 339 340 static void unix_insert_bsd_socket(struct sock *sk) 341 { 342 spin_lock(&bsd_socket_locks[sk->sk_hash]); 343 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 344 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 345 } 346 347 static void unix_remove_bsd_socket(struct sock *sk) 348 { 349 if (!hlist_unhashed(&sk->sk_bind_node)) { 350 spin_lock(&bsd_socket_locks[sk->sk_hash]); 351 __sk_del_bind_node(sk); 352 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 353 354 sk_node_init(&sk->sk_bind_node); 355 } 356 } 357 358 static struct sock *__unix_find_socket_byname(struct net *net, 359 struct sockaddr_un *sunname, 360 int len, unsigned int hash) 361 { 362 struct sock *s; 363 364 sk_for_each(s, &net->unx.table.buckets[hash]) { 365 struct unix_sock *u = unix_sk(s); 366 367 if (u->addr->len == len && 368 !memcmp(u->addr->name, sunname, len)) 369 return s; 370 } 371 return NULL; 372 } 373 374 static inline struct sock *unix_find_socket_byname(struct net *net, 375 struct sockaddr_un *sunname, 376 int len, unsigned int hash) 377 { 378 struct sock *s; 379 380 spin_lock(&net->unx.table.locks[hash]); 381 s = __unix_find_socket_byname(net, sunname, len, hash); 382 if (s) 383 sock_hold(s); 384 spin_unlock(&net->unx.table.locks[hash]); 385 return s; 386 } 387 388 static struct sock *unix_find_socket_byinode(struct inode *i) 389 { 390 unsigned int hash = unix_bsd_hash(i); 391 struct sock *s; 392 393 spin_lock(&bsd_socket_locks[hash]); 394 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 395 struct dentry *dentry = unix_sk(s)->path.dentry; 396 397 if (dentry && d_backing_inode(dentry) == i) { 398 sock_hold(s); 399 spin_unlock(&bsd_socket_locks[hash]); 400 return s; 401 } 402 } 403 spin_unlock(&bsd_socket_locks[hash]); 404 return NULL; 405 } 406 407 /* Support code for asymmetrically connected dgram sockets 408 * 409 * If a datagram socket is connected to a socket not itself connected 410 * to the first socket (eg, /dev/log), clients may only enqueue more 411 * messages if the present receive queue of the server socket is not 412 * "too large". This means there's a second writeability condition 413 * poll and sendmsg need to test. The dgram recv code will do a wake 414 * up on the peer_wait wait queue of a socket upon reception of a 415 * datagram which needs to be propagated to sleeping would-be writers 416 * since these might not have sent anything so far. This can't be 417 * accomplished via poll_wait because the lifetime of the server 418 * socket might be less than that of its clients if these break their 419 * association with it or if the server socket is closed while clients 420 * are still connected to it and there's no way to inform "a polling 421 * implementation" that it should let go of a certain wait queue 422 * 423 * In order to propagate a wake up, a wait_queue_entry_t of the client 424 * socket is enqueued on the peer_wait queue of the server socket 425 * whose wake function does a wake_up on the ordinary client socket 426 * wait queue. This connection is established whenever a write (or 427 * poll for write) hit the flow control condition and broken when the 428 * association to the server socket is dissolved or after a wake up 429 * was relayed. 430 */ 431 432 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 433 void *key) 434 { 435 struct unix_sock *u; 436 wait_queue_head_t *u_sleep; 437 438 u = container_of(q, struct unix_sock, peer_wake); 439 440 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 441 q); 442 u->peer_wake.private = NULL; 443 444 /* relaying can only happen while the wq still exists */ 445 u_sleep = sk_sleep(&u->sk); 446 if (u_sleep) 447 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 448 449 return 0; 450 } 451 452 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 453 { 454 struct unix_sock *u, *u_other; 455 int rc; 456 457 u = unix_sk(sk); 458 u_other = unix_sk(other); 459 rc = 0; 460 spin_lock(&u_other->peer_wait.lock); 461 462 if (!u->peer_wake.private) { 463 u->peer_wake.private = other; 464 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 465 466 rc = 1; 467 } 468 469 spin_unlock(&u_other->peer_wait.lock); 470 return rc; 471 } 472 473 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 474 struct sock *other) 475 { 476 struct unix_sock *u, *u_other; 477 478 u = unix_sk(sk); 479 u_other = unix_sk(other); 480 spin_lock(&u_other->peer_wait.lock); 481 482 if (u->peer_wake.private == other) { 483 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 484 u->peer_wake.private = NULL; 485 } 486 487 spin_unlock(&u_other->peer_wait.lock); 488 } 489 490 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 491 struct sock *other) 492 { 493 unix_dgram_peer_wake_disconnect(sk, other); 494 wake_up_interruptible_poll(sk_sleep(sk), 495 EPOLLOUT | 496 EPOLLWRNORM | 497 EPOLLWRBAND); 498 } 499 500 /* preconditions: 501 * - unix_peer(sk) == other 502 * - association is stable 503 */ 504 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 505 { 506 int connected; 507 508 connected = unix_dgram_peer_wake_connect(sk, other); 509 510 /* If other is SOCK_DEAD, we want to make sure we signal 511 * POLLOUT, such that a subsequent write() can get a 512 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 513 * to other and its full, we will hang waiting for POLLOUT. 514 */ 515 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 516 return 1; 517 518 if (connected) 519 unix_dgram_peer_wake_disconnect(sk, other); 520 521 return 0; 522 } 523 524 static int unix_writable(const struct sock *sk) 525 { 526 return sk->sk_state != TCP_LISTEN && 527 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 528 } 529 530 static void unix_write_space(struct sock *sk) 531 { 532 struct socket_wq *wq; 533 534 rcu_read_lock(); 535 if (unix_writable(sk)) { 536 wq = rcu_dereference(sk->sk_wq); 537 if (skwq_has_sleeper(wq)) 538 wake_up_interruptible_sync_poll(&wq->wait, 539 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 540 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 541 } 542 rcu_read_unlock(); 543 } 544 545 /* When dgram socket disconnects (or changes its peer), we clear its receive 546 * queue of packets arrived from previous peer. First, it allows to do 547 * flow control based only on wmem_alloc; second, sk connected to peer 548 * may receive messages only from that peer. */ 549 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 550 { 551 if (!skb_queue_empty(&sk->sk_receive_queue)) { 552 skb_queue_purge(&sk->sk_receive_queue); 553 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 554 555 /* If one link of bidirectional dgram pipe is disconnected, 556 * we signal error. Messages are lost. Do not make this, 557 * when peer was not connected to us. 558 */ 559 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 560 WRITE_ONCE(other->sk_err, ECONNRESET); 561 sk_error_report(other); 562 } 563 } 564 other->sk_state = TCP_CLOSE; 565 } 566 567 static void unix_sock_destructor(struct sock *sk) 568 { 569 struct unix_sock *u = unix_sk(sk); 570 571 skb_queue_purge(&sk->sk_receive_queue); 572 573 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 574 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 575 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 576 if (!sock_flag(sk, SOCK_DEAD)) { 577 pr_info("Attempt to release alive unix socket: %p\n", sk); 578 return; 579 } 580 581 if (u->addr) 582 unix_release_addr(u->addr); 583 584 atomic_long_dec(&unix_nr_socks); 585 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 586 #ifdef UNIX_REFCNT_DEBUG 587 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 588 atomic_long_read(&unix_nr_socks)); 589 #endif 590 } 591 592 static void unix_release_sock(struct sock *sk, int embrion) 593 { 594 struct unix_sock *u = unix_sk(sk); 595 struct sock *skpair; 596 struct sk_buff *skb; 597 struct path path; 598 int state; 599 600 unix_remove_socket(sock_net(sk), sk); 601 unix_remove_bsd_socket(sk); 602 603 /* Clear state */ 604 unix_state_lock(sk); 605 sock_orphan(sk); 606 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); 607 path = u->path; 608 u->path.dentry = NULL; 609 u->path.mnt = NULL; 610 state = sk->sk_state; 611 sk->sk_state = TCP_CLOSE; 612 613 skpair = unix_peer(sk); 614 unix_peer(sk) = NULL; 615 616 unix_state_unlock(sk); 617 618 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 619 if (u->oob_skb) { 620 kfree_skb(u->oob_skb); 621 u->oob_skb = NULL; 622 } 623 #endif 624 625 wake_up_interruptible_all(&u->peer_wait); 626 627 if (skpair != NULL) { 628 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 629 unix_state_lock(skpair); 630 /* No more writes */ 631 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); 632 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 633 WRITE_ONCE(skpair->sk_err, ECONNRESET); 634 unix_state_unlock(skpair); 635 skpair->sk_state_change(skpair); 636 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 637 } 638 639 unix_dgram_peer_wake_disconnect(sk, skpair); 640 sock_put(skpair); /* It may now die */ 641 } 642 643 /* Try to flush out this socket. Throw out buffers at least */ 644 645 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 646 if (state == TCP_LISTEN) 647 unix_release_sock(skb->sk, 1); 648 /* passed fds are erased in the kfree_skb hook */ 649 UNIXCB(skb).consumed = skb->len; 650 kfree_skb(skb); 651 } 652 653 if (path.dentry) 654 path_put(&path); 655 656 sock_put(sk); 657 658 /* ---- Socket is dead now and most probably destroyed ---- */ 659 660 /* 661 * Fixme: BSD difference: In BSD all sockets connected to us get 662 * ECONNRESET and we die on the spot. In Linux we behave 663 * like files and pipes do and wait for the last 664 * dereference. 665 * 666 * Can't we simply set sock->err? 667 * 668 * What the above comment does talk about? --ANK(980817) 669 */ 670 671 if (unix_tot_inflight) 672 unix_gc(); /* Garbage collect fds */ 673 } 674 675 static void init_peercred(struct sock *sk) 676 { 677 const struct cred *old_cred; 678 struct pid *old_pid; 679 680 spin_lock(&sk->sk_peer_lock); 681 old_pid = sk->sk_peer_pid; 682 old_cred = sk->sk_peer_cred; 683 sk->sk_peer_pid = get_pid(task_tgid(current)); 684 sk->sk_peer_cred = get_current_cred(); 685 spin_unlock(&sk->sk_peer_lock); 686 687 put_pid(old_pid); 688 put_cred(old_cred); 689 } 690 691 static void copy_peercred(struct sock *sk, struct sock *peersk) 692 { 693 const struct cred *old_cred; 694 struct pid *old_pid; 695 696 if (sk < peersk) { 697 spin_lock(&sk->sk_peer_lock); 698 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 699 } else { 700 spin_lock(&peersk->sk_peer_lock); 701 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 702 } 703 old_pid = sk->sk_peer_pid; 704 old_cred = sk->sk_peer_cred; 705 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 706 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 707 708 spin_unlock(&sk->sk_peer_lock); 709 spin_unlock(&peersk->sk_peer_lock); 710 711 put_pid(old_pid); 712 put_cred(old_cred); 713 } 714 715 static int unix_listen(struct socket *sock, int backlog) 716 { 717 int err; 718 struct sock *sk = sock->sk; 719 struct unix_sock *u = unix_sk(sk); 720 721 err = -EOPNOTSUPP; 722 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 723 goto out; /* Only stream/seqpacket sockets accept */ 724 err = -EINVAL; 725 if (!u->addr) 726 goto out; /* No listens on an unbound socket */ 727 unix_state_lock(sk); 728 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 729 goto out_unlock; 730 if (backlog > sk->sk_max_ack_backlog) 731 wake_up_interruptible_all(&u->peer_wait); 732 sk->sk_max_ack_backlog = backlog; 733 sk->sk_state = TCP_LISTEN; 734 /* set credentials so connect can copy them */ 735 init_peercred(sk); 736 err = 0; 737 738 out_unlock: 739 unix_state_unlock(sk); 740 out: 741 return err; 742 } 743 744 static int unix_release(struct socket *); 745 static int unix_bind(struct socket *, struct sockaddr *, int); 746 static int unix_stream_connect(struct socket *, struct sockaddr *, 747 int addr_len, int flags); 748 static int unix_socketpair(struct socket *, struct socket *); 749 static int unix_accept(struct socket *, struct socket *, int, bool); 750 static int unix_getname(struct socket *, struct sockaddr *, int); 751 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 752 static __poll_t unix_dgram_poll(struct file *, struct socket *, 753 poll_table *); 754 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 755 #ifdef CONFIG_COMPAT 756 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 757 #endif 758 static int unix_shutdown(struct socket *, int); 759 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 760 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 761 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 762 struct pipe_inode_info *, size_t size, 763 unsigned int flags); 764 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 765 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 766 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 767 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 768 static int unix_dgram_connect(struct socket *, struct sockaddr *, 769 int, int); 770 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 771 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 772 int); 773 774 static int unix_set_peek_off(struct sock *sk, int val) 775 { 776 struct unix_sock *u = unix_sk(sk); 777 778 if (mutex_lock_interruptible(&u->iolock)) 779 return -EINTR; 780 781 sk->sk_peek_off = val; 782 mutex_unlock(&u->iolock); 783 784 return 0; 785 } 786 787 #ifdef CONFIG_PROC_FS 788 static int unix_count_nr_fds(struct sock *sk) 789 { 790 struct sk_buff *skb; 791 struct unix_sock *u; 792 int nr_fds = 0; 793 794 spin_lock(&sk->sk_receive_queue.lock); 795 skb = skb_peek(&sk->sk_receive_queue); 796 while (skb) { 797 u = unix_sk(skb->sk); 798 nr_fds += atomic_read(&u->scm_stat.nr_fds); 799 skb = skb_peek_next(skb, &sk->sk_receive_queue); 800 } 801 spin_unlock(&sk->sk_receive_queue.lock); 802 803 return nr_fds; 804 } 805 806 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 807 { 808 struct sock *sk = sock->sk; 809 unsigned char s_state; 810 struct unix_sock *u; 811 int nr_fds = 0; 812 813 if (sk) { 814 s_state = READ_ONCE(sk->sk_state); 815 u = unix_sk(sk); 816 817 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their 818 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN. 819 * SOCK_DGRAM is ordinary. So, no lock is needed. 820 */ 821 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED) 822 nr_fds = atomic_read(&u->scm_stat.nr_fds); 823 else if (s_state == TCP_LISTEN) 824 nr_fds = unix_count_nr_fds(sk); 825 826 seq_printf(m, "scm_fds: %u\n", nr_fds); 827 } 828 } 829 #else 830 #define unix_show_fdinfo NULL 831 #endif 832 833 static const struct proto_ops unix_stream_ops = { 834 .family = PF_UNIX, 835 .owner = THIS_MODULE, 836 .release = unix_release, 837 .bind = unix_bind, 838 .connect = unix_stream_connect, 839 .socketpair = unix_socketpair, 840 .accept = unix_accept, 841 .getname = unix_getname, 842 .poll = unix_poll, 843 .ioctl = unix_ioctl, 844 #ifdef CONFIG_COMPAT 845 .compat_ioctl = unix_compat_ioctl, 846 #endif 847 .listen = unix_listen, 848 .shutdown = unix_shutdown, 849 .sendmsg = unix_stream_sendmsg, 850 .recvmsg = unix_stream_recvmsg, 851 .read_skb = unix_stream_read_skb, 852 .mmap = sock_no_mmap, 853 .splice_read = unix_stream_splice_read, 854 .set_peek_off = unix_set_peek_off, 855 .show_fdinfo = unix_show_fdinfo, 856 }; 857 858 static const struct proto_ops unix_dgram_ops = { 859 .family = PF_UNIX, 860 .owner = THIS_MODULE, 861 .release = unix_release, 862 .bind = unix_bind, 863 .connect = unix_dgram_connect, 864 .socketpair = unix_socketpair, 865 .accept = sock_no_accept, 866 .getname = unix_getname, 867 .poll = unix_dgram_poll, 868 .ioctl = unix_ioctl, 869 #ifdef CONFIG_COMPAT 870 .compat_ioctl = unix_compat_ioctl, 871 #endif 872 .listen = sock_no_listen, 873 .shutdown = unix_shutdown, 874 .sendmsg = unix_dgram_sendmsg, 875 .read_skb = unix_read_skb, 876 .recvmsg = unix_dgram_recvmsg, 877 .mmap = sock_no_mmap, 878 .set_peek_off = unix_set_peek_off, 879 .show_fdinfo = unix_show_fdinfo, 880 }; 881 882 static const struct proto_ops unix_seqpacket_ops = { 883 .family = PF_UNIX, 884 .owner = THIS_MODULE, 885 .release = unix_release, 886 .bind = unix_bind, 887 .connect = unix_stream_connect, 888 .socketpair = unix_socketpair, 889 .accept = unix_accept, 890 .getname = unix_getname, 891 .poll = unix_dgram_poll, 892 .ioctl = unix_ioctl, 893 #ifdef CONFIG_COMPAT 894 .compat_ioctl = unix_compat_ioctl, 895 #endif 896 .listen = unix_listen, 897 .shutdown = unix_shutdown, 898 .sendmsg = unix_seqpacket_sendmsg, 899 .recvmsg = unix_seqpacket_recvmsg, 900 .mmap = sock_no_mmap, 901 .set_peek_off = unix_set_peek_off, 902 .show_fdinfo = unix_show_fdinfo, 903 }; 904 905 static void unix_close(struct sock *sk, long timeout) 906 { 907 /* Nothing to do here, unix socket does not need a ->close(). 908 * This is merely for sockmap. 909 */ 910 } 911 912 static void unix_unhash(struct sock *sk) 913 { 914 /* Nothing to do here, unix socket does not need a ->unhash(). 915 * This is merely for sockmap. 916 */ 917 } 918 919 static bool unix_bpf_bypass_getsockopt(int level, int optname) 920 { 921 if (level == SOL_SOCKET) { 922 switch (optname) { 923 case SO_PEERPIDFD: 924 return true; 925 default: 926 return false; 927 } 928 } 929 930 return false; 931 } 932 933 struct proto unix_dgram_proto = { 934 .name = "UNIX", 935 .owner = THIS_MODULE, 936 .obj_size = sizeof(struct unix_sock), 937 .close = unix_close, 938 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 939 #ifdef CONFIG_BPF_SYSCALL 940 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 941 #endif 942 }; 943 944 struct proto unix_stream_proto = { 945 .name = "UNIX-STREAM", 946 .owner = THIS_MODULE, 947 .obj_size = sizeof(struct unix_sock), 948 .close = unix_close, 949 .unhash = unix_unhash, 950 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, 951 #ifdef CONFIG_BPF_SYSCALL 952 .psock_update_sk_prot = unix_stream_bpf_update_proto, 953 #endif 954 }; 955 956 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 957 { 958 struct unix_sock *u; 959 struct sock *sk; 960 int err; 961 962 atomic_long_inc(&unix_nr_socks); 963 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 964 err = -ENFILE; 965 goto err; 966 } 967 968 if (type == SOCK_STREAM) 969 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 970 else /*dgram and seqpacket */ 971 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 972 973 if (!sk) { 974 err = -ENOMEM; 975 goto err; 976 } 977 978 sock_init_data(sock, sk); 979 980 sk->sk_hash = unix_unbound_hash(sk); 981 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 982 sk->sk_write_space = unix_write_space; 983 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 984 sk->sk_destruct = unix_sock_destructor; 985 u = unix_sk(sk); 986 u->path.dentry = NULL; 987 u->path.mnt = NULL; 988 spin_lock_init(&u->lock); 989 atomic_long_set(&u->inflight, 0); 990 INIT_LIST_HEAD(&u->link); 991 mutex_init(&u->iolock); /* single task reading lock */ 992 mutex_init(&u->bindlock); /* single task binding lock */ 993 init_waitqueue_head(&u->peer_wait); 994 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 995 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 996 unix_insert_unbound_socket(net, sk); 997 998 sock_prot_inuse_add(net, sk->sk_prot, 1); 999 1000 return sk; 1001 1002 err: 1003 atomic_long_dec(&unix_nr_socks); 1004 return ERR_PTR(err); 1005 } 1006 1007 static int unix_create(struct net *net, struct socket *sock, int protocol, 1008 int kern) 1009 { 1010 struct sock *sk; 1011 1012 if (protocol && protocol != PF_UNIX) 1013 return -EPROTONOSUPPORT; 1014 1015 sock->state = SS_UNCONNECTED; 1016 1017 switch (sock->type) { 1018 case SOCK_STREAM: 1019 sock->ops = &unix_stream_ops; 1020 break; 1021 /* 1022 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 1023 * nothing uses it. 1024 */ 1025 case SOCK_RAW: 1026 sock->type = SOCK_DGRAM; 1027 fallthrough; 1028 case SOCK_DGRAM: 1029 sock->ops = &unix_dgram_ops; 1030 break; 1031 case SOCK_SEQPACKET: 1032 sock->ops = &unix_seqpacket_ops; 1033 break; 1034 default: 1035 return -ESOCKTNOSUPPORT; 1036 } 1037 1038 sk = unix_create1(net, sock, kern, sock->type); 1039 if (IS_ERR(sk)) 1040 return PTR_ERR(sk); 1041 1042 return 0; 1043 } 1044 1045 static int unix_release(struct socket *sock) 1046 { 1047 struct sock *sk = sock->sk; 1048 1049 if (!sk) 1050 return 0; 1051 1052 sk->sk_prot->close(sk, 0); 1053 unix_release_sock(sk, 0); 1054 sock->sk = NULL; 1055 1056 return 0; 1057 } 1058 1059 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1060 int type) 1061 { 1062 struct inode *inode; 1063 struct path path; 1064 struct sock *sk; 1065 int err; 1066 1067 unix_mkname_bsd(sunaddr, addr_len); 1068 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1069 if (err) 1070 goto fail; 1071 1072 err = path_permission(&path, MAY_WRITE); 1073 if (err) 1074 goto path_put; 1075 1076 err = -ECONNREFUSED; 1077 inode = d_backing_inode(path.dentry); 1078 if (!S_ISSOCK(inode->i_mode)) 1079 goto path_put; 1080 1081 sk = unix_find_socket_byinode(inode); 1082 if (!sk) 1083 goto path_put; 1084 1085 err = -EPROTOTYPE; 1086 if (sk->sk_type == type) 1087 touch_atime(&path); 1088 else 1089 goto sock_put; 1090 1091 path_put(&path); 1092 1093 return sk; 1094 1095 sock_put: 1096 sock_put(sk); 1097 path_put: 1098 path_put(&path); 1099 fail: 1100 return ERR_PTR(err); 1101 } 1102 1103 static struct sock *unix_find_abstract(struct net *net, 1104 struct sockaddr_un *sunaddr, 1105 int addr_len, int type) 1106 { 1107 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1108 struct dentry *dentry; 1109 struct sock *sk; 1110 1111 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1112 if (!sk) 1113 return ERR_PTR(-ECONNREFUSED); 1114 1115 dentry = unix_sk(sk)->path.dentry; 1116 if (dentry) 1117 touch_atime(&unix_sk(sk)->path); 1118 1119 return sk; 1120 } 1121 1122 static struct sock *unix_find_other(struct net *net, 1123 struct sockaddr_un *sunaddr, 1124 int addr_len, int type) 1125 { 1126 struct sock *sk; 1127 1128 if (sunaddr->sun_path[0]) 1129 sk = unix_find_bsd(sunaddr, addr_len, type); 1130 else 1131 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1132 1133 return sk; 1134 } 1135 1136 static int unix_autobind(struct sock *sk) 1137 { 1138 unsigned int new_hash, old_hash = sk->sk_hash; 1139 struct unix_sock *u = unix_sk(sk); 1140 struct net *net = sock_net(sk); 1141 struct unix_address *addr; 1142 u32 lastnum, ordernum; 1143 int err; 1144 1145 err = mutex_lock_interruptible(&u->bindlock); 1146 if (err) 1147 return err; 1148 1149 if (u->addr) 1150 goto out; 1151 1152 err = -ENOMEM; 1153 addr = kzalloc(sizeof(*addr) + 1154 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1155 if (!addr) 1156 goto out; 1157 1158 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1159 addr->name->sun_family = AF_UNIX; 1160 refcount_set(&addr->refcnt, 1); 1161 1162 ordernum = get_random_u32(); 1163 lastnum = ordernum & 0xFFFFF; 1164 retry: 1165 ordernum = (ordernum + 1) & 0xFFFFF; 1166 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1167 1168 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1169 unix_table_double_lock(net, old_hash, new_hash); 1170 1171 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1172 unix_table_double_unlock(net, old_hash, new_hash); 1173 1174 /* __unix_find_socket_byname() may take long time if many names 1175 * are already in use. 1176 */ 1177 cond_resched(); 1178 1179 if (ordernum == lastnum) { 1180 /* Give up if all names seems to be in use. */ 1181 err = -ENOSPC; 1182 unix_release_addr(addr); 1183 goto out; 1184 } 1185 1186 goto retry; 1187 } 1188 1189 __unix_set_addr_hash(net, sk, addr, new_hash); 1190 unix_table_double_unlock(net, old_hash, new_hash); 1191 err = 0; 1192 1193 out: mutex_unlock(&u->bindlock); 1194 return err; 1195 } 1196 1197 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1198 int addr_len) 1199 { 1200 umode_t mode = S_IFSOCK | 1201 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1202 unsigned int new_hash, old_hash = sk->sk_hash; 1203 struct unix_sock *u = unix_sk(sk); 1204 struct net *net = sock_net(sk); 1205 struct mnt_idmap *idmap; 1206 struct unix_address *addr; 1207 struct dentry *dentry; 1208 struct path parent; 1209 int err; 1210 1211 unix_mkname_bsd(sunaddr, addr_len); 1212 addr_len = strlen(sunaddr->sun_path) + 1213 offsetof(struct sockaddr_un, sun_path) + 1; 1214 1215 addr = unix_create_addr(sunaddr, addr_len); 1216 if (!addr) 1217 return -ENOMEM; 1218 1219 /* 1220 * Get the parent directory, calculate the hash for last 1221 * component. 1222 */ 1223 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1224 if (IS_ERR(dentry)) { 1225 err = PTR_ERR(dentry); 1226 goto out; 1227 } 1228 1229 /* 1230 * All right, let's create it. 1231 */ 1232 idmap = mnt_idmap(parent.mnt); 1233 err = security_path_mknod(&parent, dentry, mode, 0); 1234 if (!err) 1235 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); 1236 if (err) 1237 goto out_path; 1238 err = mutex_lock_interruptible(&u->bindlock); 1239 if (err) 1240 goto out_unlink; 1241 if (u->addr) 1242 goto out_unlock; 1243 1244 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1245 unix_table_double_lock(net, old_hash, new_hash); 1246 u->path.mnt = mntget(parent.mnt); 1247 u->path.dentry = dget(dentry); 1248 __unix_set_addr_hash(net, sk, addr, new_hash); 1249 unix_table_double_unlock(net, old_hash, new_hash); 1250 unix_insert_bsd_socket(sk); 1251 mutex_unlock(&u->bindlock); 1252 done_path_create(&parent, dentry); 1253 return 0; 1254 1255 out_unlock: 1256 mutex_unlock(&u->bindlock); 1257 err = -EINVAL; 1258 out_unlink: 1259 /* failed after successful mknod? unlink what we'd created... */ 1260 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL); 1261 out_path: 1262 done_path_create(&parent, dentry); 1263 out: 1264 unix_release_addr(addr); 1265 return err == -EEXIST ? -EADDRINUSE : err; 1266 } 1267 1268 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1269 int addr_len) 1270 { 1271 unsigned int new_hash, old_hash = sk->sk_hash; 1272 struct unix_sock *u = unix_sk(sk); 1273 struct net *net = sock_net(sk); 1274 struct unix_address *addr; 1275 int err; 1276 1277 addr = unix_create_addr(sunaddr, addr_len); 1278 if (!addr) 1279 return -ENOMEM; 1280 1281 err = mutex_lock_interruptible(&u->bindlock); 1282 if (err) 1283 goto out; 1284 1285 if (u->addr) { 1286 err = -EINVAL; 1287 goto out_mutex; 1288 } 1289 1290 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1291 unix_table_double_lock(net, old_hash, new_hash); 1292 1293 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1294 goto out_spin; 1295 1296 __unix_set_addr_hash(net, sk, addr, new_hash); 1297 unix_table_double_unlock(net, old_hash, new_hash); 1298 mutex_unlock(&u->bindlock); 1299 return 0; 1300 1301 out_spin: 1302 unix_table_double_unlock(net, old_hash, new_hash); 1303 err = -EADDRINUSE; 1304 out_mutex: 1305 mutex_unlock(&u->bindlock); 1306 out: 1307 unix_release_addr(addr); 1308 return err; 1309 } 1310 1311 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1312 { 1313 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1314 struct sock *sk = sock->sk; 1315 int err; 1316 1317 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1318 sunaddr->sun_family == AF_UNIX) 1319 return unix_autobind(sk); 1320 1321 err = unix_validate_addr(sunaddr, addr_len); 1322 if (err) 1323 return err; 1324 1325 if (sunaddr->sun_path[0]) 1326 err = unix_bind_bsd(sk, sunaddr, addr_len); 1327 else 1328 err = unix_bind_abstract(sk, sunaddr, addr_len); 1329 1330 return err; 1331 } 1332 1333 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1334 { 1335 if (unlikely(sk1 == sk2) || !sk2) { 1336 unix_state_lock(sk1); 1337 return; 1338 } 1339 if (sk1 < sk2) { 1340 unix_state_lock(sk1); 1341 unix_state_lock_nested(sk2); 1342 } else { 1343 unix_state_lock(sk2); 1344 unix_state_lock_nested(sk1); 1345 } 1346 } 1347 1348 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1349 { 1350 if (unlikely(sk1 == sk2) || !sk2) { 1351 unix_state_unlock(sk1); 1352 return; 1353 } 1354 unix_state_unlock(sk1); 1355 unix_state_unlock(sk2); 1356 } 1357 1358 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1359 int alen, int flags) 1360 { 1361 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1362 struct sock *sk = sock->sk; 1363 struct sock *other; 1364 int err; 1365 1366 err = -EINVAL; 1367 if (alen < offsetofend(struct sockaddr, sa_family)) 1368 goto out; 1369 1370 if (addr->sa_family != AF_UNSPEC) { 1371 err = unix_validate_addr(sunaddr, alen); 1372 if (err) 1373 goto out; 1374 1375 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1376 test_bit(SOCK_PASSPIDFD, &sock->flags)) && 1377 !unix_sk(sk)->addr) { 1378 err = unix_autobind(sk); 1379 if (err) 1380 goto out; 1381 } 1382 1383 restart: 1384 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1385 if (IS_ERR(other)) { 1386 err = PTR_ERR(other); 1387 goto out; 1388 } 1389 1390 unix_state_double_lock(sk, other); 1391 1392 /* Apparently VFS overslept socket death. Retry. */ 1393 if (sock_flag(other, SOCK_DEAD)) { 1394 unix_state_double_unlock(sk, other); 1395 sock_put(other); 1396 goto restart; 1397 } 1398 1399 err = -EPERM; 1400 if (!unix_may_send(sk, other)) 1401 goto out_unlock; 1402 1403 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1404 if (err) 1405 goto out_unlock; 1406 1407 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1408 } else { 1409 /* 1410 * 1003.1g breaking connected state with AF_UNSPEC 1411 */ 1412 other = NULL; 1413 unix_state_double_lock(sk, other); 1414 } 1415 1416 /* 1417 * If it was connected, reconnect. 1418 */ 1419 if (unix_peer(sk)) { 1420 struct sock *old_peer = unix_peer(sk); 1421 1422 unix_peer(sk) = other; 1423 if (!other) 1424 sk->sk_state = TCP_CLOSE; 1425 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1426 1427 unix_state_double_unlock(sk, other); 1428 1429 if (other != old_peer) 1430 unix_dgram_disconnected(sk, old_peer); 1431 sock_put(old_peer); 1432 } else { 1433 unix_peer(sk) = other; 1434 unix_state_double_unlock(sk, other); 1435 } 1436 1437 return 0; 1438 1439 out_unlock: 1440 unix_state_double_unlock(sk, other); 1441 sock_put(other); 1442 out: 1443 return err; 1444 } 1445 1446 static long unix_wait_for_peer(struct sock *other, long timeo) 1447 __releases(&unix_sk(other)->lock) 1448 { 1449 struct unix_sock *u = unix_sk(other); 1450 int sched; 1451 DEFINE_WAIT(wait); 1452 1453 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1454 1455 sched = !sock_flag(other, SOCK_DEAD) && 1456 !(other->sk_shutdown & RCV_SHUTDOWN) && 1457 unix_recvq_full_lockless(other); 1458 1459 unix_state_unlock(other); 1460 1461 if (sched) 1462 timeo = schedule_timeout(timeo); 1463 1464 finish_wait(&u->peer_wait, &wait); 1465 return timeo; 1466 } 1467 1468 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1469 int addr_len, int flags) 1470 { 1471 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1472 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1473 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1474 struct net *net = sock_net(sk); 1475 struct sk_buff *skb = NULL; 1476 long timeo; 1477 int err; 1478 int st; 1479 1480 err = unix_validate_addr(sunaddr, addr_len); 1481 if (err) 1482 goto out; 1483 1484 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1485 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { 1486 err = unix_autobind(sk); 1487 if (err) 1488 goto out; 1489 } 1490 1491 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1492 1493 /* First of all allocate resources. 1494 If we will make it after state is locked, 1495 we will have to recheck all again in any case. 1496 */ 1497 1498 /* create new sock for complete connection */ 1499 newsk = unix_create1(net, NULL, 0, sock->type); 1500 if (IS_ERR(newsk)) { 1501 err = PTR_ERR(newsk); 1502 newsk = NULL; 1503 goto out; 1504 } 1505 1506 err = -ENOMEM; 1507 1508 /* Allocate skb for sending to listening sock */ 1509 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1510 if (skb == NULL) 1511 goto out; 1512 1513 restart: 1514 /* Find listening sock. */ 1515 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1516 if (IS_ERR(other)) { 1517 err = PTR_ERR(other); 1518 other = NULL; 1519 goto out; 1520 } 1521 1522 /* Latch state of peer */ 1523 unix_state_lock(other); 1524 1525 /* Apparently VFS overslept socket death. Retry. */ 1526 if (sock_flag(other, SOCK_DEAD)) { 1527 unix_state_unlock(other); 1528 sock_put(other); 1529 goto restart; 1530 } 1531 1532 err = -ECONNREFUSED; 1533 if (other->sk_state != TCP_LISTEN) 1534 goto out_unlock; 1535 if (other->sk_shutdown & RCV_SHUTDOWN) 1536 goto out_unlock; 1537 1538 if (unix_recvq_full(other)) { 1539 err = -EAGAIN; 1540 if (!timeo) 1541 goto out_unlock; 1542 1543 timeo = unix_wait_for_peer(other, timeo); 1544 1545 err = sock_intr_errno(timeo); 1546 if (signal_pending(current)) 1547 goto out; 1548 sock_put(other); 1549 goto restart; 1550 } 1551 1552 /* Latch our state. 1553 1554 It is tricky place. We need to grab our state lock and cannot 1555 drop lock on peer. It is dangerous because deadlock is 1556 possible. Connect to self case and simultaneous 1557 attempt to connect are eliminated by checking socket 1558 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1559 check this before attempt to grab lock. 1560 1561 Well, and we have to recheck the state after socket locked. 1562 */ 1563 st = sk->sk_state; 1564 1565 switch (st) { 1566 case TCP_CLOSE: 1567 /* This is ok... continue with connect */ 1568 break; 1569 case TCP_ESTABLISHED: 1570 /* Socket is already connected */ 1571 err = -EISCONN; 1572 goto out_unlock; 1573 default: 1574 err = -EINVAL; 1575 goto out_unlock; 1576 } 1577 1578 unix_state_lock_nested(sk); 1579 1580 if (sk->sk_state != st) { 1581 unix_state_unlock(sk); 1582 unix_state_unlock(other); 1583 sock_put(other); 1584 goto restart; 1585 } 1586 1587 err = security_unix_stream_connect(sk, other, newsk); 1588 if (err) { 1589 unix_state_unlock(sk); 1590 goto out_unlock; 1591 } 1592 1593 /* The way is open! Fastly set all the necessary fields... */ 1594 1595 sock_hold(sk); 1596 unix_peer(newsk) = sk; 1597 newsk->sk_state = TCP_ESTABLISHED; 1598 newsk->sk_type = sk->sk_type; 1599 init_peercred(newsk); 1600 newu = unix_sk(newsk); 1601 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1602 otheru = unix_sk(other); 1603 1604 /* copy address information from listening to new sock 1605 * 1606 * The contents of *(otheru->addr) and otheru->path 1607 * are seen fully set up here, since we have found 1608 * otheru in hash under its lock. Insertion into the 1609 * hash chain we'd found it in had been done in an 1610 * earlier critical area protected by the chain's lock, 1611 * the same one where we'd set *(otheru->addr) contents, 1612 * as well as otheru->path and otheru->addr itself. 1613 * 1614 * Using smp_store_release() here to set newu->addr 1615 * is enough to make those stores, as well as stores 1616 * to newu->path visible to anyone who gets newu->addr 1617 * by smp_load_acquire(). IOW, the same warranties 1618 * as for unix_sock instances bound in unix_bind() or 1619 * in unix_autobind(). 1620 */ 1621 if (otheru->path.dentry) { 1622 path_get(&otheru->path); 1623 newu->path = otheru->path; 1624 } 1625 refcount_inc(&otheru->addr->refcnt); 1626 smp_store_release(&newu->addr, otheru->addr); 1627 1628 /* Set credentials */ 1629 copy_peercred(sk, other); 1630 1631 sock->state = SS_CONNECTED; 1632 sk->sk_state = TCP_ESTABLISHED; 1633 sock_hold(newsk); 1634 1635 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1636 unix_peer(sk) = newsk; 1637 1638 unix_state_unlock(sk); 1639 1640 /* take ten and send info to listening sock */ 1641 spin_lock(&other->sk_receive_queue.lock); 1642 __skb_queue_tail(&other->sk_receive_queue, skb); 1643 spin_unlock(&other->sk_receive_queue.lock); 1644 unix_state_unlock(other); 1645 other->sk_data_ready(other); 1646 sock_put(other); 1647 return 0; 1648 1649 out_unlock: 1650 if (other) 1651 unix_state_unlock(other); 1652 1653 out: 1654 kfree_skb(skb); 1655 if (newsk) 1656 unix_release_sock(newsk, 0); 1657 if (other) 1658 sock_put(other); 1659 return err; 1660 } 1661 1662 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1663 { 1664 struct sock *ska = socka->sk, *skb = sockb->sk; 1665 1666 /* Join our sockets back to back */ 1667 sock_hold(ska); 1668 sock_hold(skb); 1669 unix_peer(ska) = skb; 1670 unix_peer(skb) = ska; 1671 init_peercred(ska); 1672 init_peercred(skb); 1673 1674 ska->sk_state = TCP_ESTABLISHED; 1675 skb->sk_state = TCP_ESTABLISHED; 1676 socka->state = SS_CONNECTED; 1677 sockb->state = SS_CONNECTED; 1678 return 0; 1679 } 1680 1681 static void unix_sock_inherit_flags(const struct socket *old, 1682 struct socket *new) 1683 { 1684 if (test_bit(SOCK_PASSCRED, &old->flags)) 1685 set_bit(SOCK_PASSCRED, &new->flags); 1686 if (test_bit(SOCK_PASSPIDFD, &old->flags)) 1687 set_bit(SOCK_PASSPIDFD, &new->flags); 1688 if (test_bit(SOCK_PASSSEC, &old->flags)) 1689 set_bit(SOCK_PASSSEC, &new->flags); 1690 } 1691 1692 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1693 bool kern) 1694 { 1695 struct sock *sk = sock->sk; 1696 struct sock *tsk; 1697 struct sk_buff *skb; 1698 int err; 1699 1700 err = -EOPNOTSUPP; 1701 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1702 goto out; 1703 1704 err = -EINVAL; 1705 if (sk->sk_state != TCP_LISTEN) 1706 goto out; 1707 1708 /* If socket state is TCP_LISTEN it cannot change (for now...), 1709 * so that no locks are necessary. 1710 */ 1711 1712 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1713 &err); 1714 if (!skb) { 1715 /* This means receive shutdown. */ 1716 if (err == 0) 1717 err = -EINVAL; 1718 goto out; 1719 } 1720 1721 tsk = skb->sk; 1722 skb_free_datagram(sk, skb); 1723 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1724 1725 /* attach accepted sock to socket */ 1726 unix_state_lock(tsk); 1727 newsock->state = SS_CONNECTED; 1728 unix_sock_inherit_flags(sock, newsock); 1729 sock_graft(tsk, newsock); 1730 unix_state_unlock(tsk); 1731 return 0; 1732 1733 out: 1734 return err; 1735 } 1736 1737 1738 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1739 { 1740 struct sock *sk = sock->sk; 1741 struct unix_address *addr; 1742 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1743 int err = 0; 1744 1745 if (peer) { 1746 sk = unix_peer_get(sk); 1747 1748 err = -ENOTCONN; 1749 if (!sk) 1750 goto out; 1751 err = 0; 1752 } else { 1753 sock_hold(sk); 1754 } 1755 1756 addr = smp_load_acquire(&unix_sk(sk)->addr); 1757 if (!addr) { 1758 sunaddr->sun_family = AF_UNIX; 1759 sunaddr->sun_path[0] = 0; 1760 err = offsetof(struct sockaddr_un, sun_path); 1761 } else { 1762 err = addr->len; 1763 memcpy(sunaddr, addr->name, addr->len); 1764 } 1765 sock_put(sk); 1766 out: 1767 return err; 1768 } 1769 1770 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1771 { 1772 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1773 1774 /* 1775 * Garbage collection of unix sockets starts by selecting a set of 1776 * candidate sockets which have reference only from being in flight 1777 * (total_refs == inflight_refs). This condition is checked once during 1778 * the candidate collection phase, and candidates are marked as such, so 1779 * that non-candidates can later be ignored. While inflight_refs is 1780 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1781 * is an instantaneous decision. 1782 * 1783 * Once a candidate, however, the socket must not be reinstalled into a 1784 * file descriptor while the garbage collection is in progress. 1785 * 1786 * If the above conditions are met, then the directed graph of 1787 * candidates (*) does not change while unix_gc_lock is held. 1788 * 1789 * Any operations that changes the file count through file descriptors 1790 * (dup, close, sendmsg) does not change the graph since candidates are 1791 * not installed in fds. 1792 * 1793 * Dequeing a candidate via recvmsg would install it into an fd, but 1794 * that takes unix_gc_lock to decrement the inflight count, so it's 1795 * serialized with garbage collection. 1796 * 1797 * MSG_PEEK is special in that it does not change the inflight count, 1798 * yet does install the socket into an fd. The following lock/unlock 1799 * pair is to ensure serialization with garbage collection. It must be 1800 * done between incrementing the file count and installing the file into 1801 * an fd. 1802 * 1803 * If garbage collection starts after the barrier provided by the 1804 * lock/unlock, then it will see the elevated refcount and not mark this 1805 * as a candidate. If a garbage collection is already in progress 1806 * before the file count was incremented, then the lock/unlock pair will 1807 * ensure that garbage collection is finished before progressing to 1808 * installing the fd. 1809 * 1810 * (*) A -> B where B is on the queue of A or B is on the queue of C 1811 * which is on the queue of listening socket A. 1812 */ 1813 spin_lock(&unix_gc_lock); 1814 spin_unlock(&unix_gc_lock); 1815 } 1816 1817 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1818 { 1819 int err = 0; 1820 1821 UNIXCB(skb).pid = get_pid(scm->pid); 1822 UNIXCB(skb).uid = scm->creds.uid; 1823 UNIXCB(skb).gid = scm->creds.gid; 1824 UNIXCB(skb).fp = NULL; 1825 unix_get_secdata(scm, skb); 1826 if (scm->fp && send_fds) 1827 err = unix_attach_fds(scm, skb); 1828 1829 skb->destructor = unix_destruct_scm; 1830 return err; 1831 } 1832 1833 static bool unix_passcred_enabled(const struct socket *sock, 1834 const struct sock *other) 1835 { 1836 return test_bit(SOCK_PASSCRED, &sock->flags) || 1837 test_bit(SOCK_PASSPIDFD, &sock->flags) || 1838 !other->sk_socket || 1839 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || 1840 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); 1841 } 1842 1843 /* 1844 * Some apps rely on write() giving SCM_CREDENTIALS 1845 * We include credentials if source or destination socket 1846 * asserted SOCK_PASSCRED. 1847 */ 1848 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1849 const struct sock *other) 1850 { 1851 if (UNIXCB(skb).pid) 1852 return; 1853 if (unix_passcred_enabled(sock, other)) { 1854 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1855 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1856 } 1857 } 1858 1859 static bool unix_skb_scm_eq(struct sk_buff *skb, 1860 struct scm_cookie *scm) 1861 { 1862 return UNIXCB(skb).pid == scm->pid && 1863 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1864 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1865 unix_secdata_eq(scm, skb); 1866 } 1867 1868 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1869 { 1870 struct scm_fp_list *fp = UNIXCB(skb).fp; 1871 struct unix_sock *u = unix_sk(sk); 1872 1873 if (unlikely(fp && fp->count)) 1874 atomic_add(fp->count, &u->scm_stat.nr_fds); 1875 } 1876 1877 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1878 { 1879 struct scm_fp_list *fp = UNIXCB(skb).fp; 1880 struct unix_sock *u = unix_sk(sk); 1881 1882 if (unlikely(fp && fp->count)) 1883 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1884 } 1885 1886 /* 1887 * Send AF_UNIX data. 1888 */ 1889 1890 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1891 size_t len) 1892 { 1893 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1894 struct sock *sk = sock->sk, *other = NULL; 1895 struct unix_sock *u = unix_sk(sk); 1896 struct scm_cookie scm; 1897 struct sk_buff *skb; 1898 int data_len = 0; 1899 int sk_locked; 1900 long timeo; 1901 int err; 1902 1903 wait_for_unix_gc(); 1904 err = scm_send(sock, msg, &scm, false); 1905 if (err < 0) 1906 return err; 1907 1908 err = -EOPNOTSUPP; 1909 if (msg->msg_flags&MSG_OOB) 1910 goto out; 1911 1912 if (msg->msg_namelen) { 1913 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1914 if (err) 1915 goto out; 1916 } else { 1917 sunaddr = NULL; 1918 err = -ENOTCONN; 1919 other = unix_peer_get(sk); 1920 if (!other) 1921 goto out; 1922 } 1923 1924 if ((test_bit(SOCK_PASSCRED, &sock->flags) || 1925 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { 1926 err = unix_autobind(sk); 1927 if (err) 1928 goto out; 1929 } 1930 1931 err = -EMSGSIZE; 1932 if (len > sk->sk_sndbuf - 32) 1933 goto out; 1934 1935 if (len > SKB_MAX_ALLOC) { 1936 data_len = min_t(size_t, 1937 len - SKB_MAX_ALLOC, 1938 MAX_SKB_FRAGS * PAGE_SIZE); 1939 data_len = PAGE_ALIGN(data_len); 1940 1941 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1942 } 1943 1944 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1945 msg->msg_flags & MSG_DONTWAIT, &err, 1946 PAGE_ALLOC_COSTLY_ORDER); 1947 if (skb == NULL) 1948 goto out; 1949 1950 err = unix_scm_to_skb(&scm, skb, true); 1951 if (err < 0) 1952 goto out_free; 1953 1954 skb_put(skb, len - data_len); 1955 skb->data_len = data_len; 1956 skb->len = len; 1957 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1958 if (err) 1959 goto out_free; 1960 1961 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1962 1963 restart: 1964 if (!other) { 1965 err = -ECONNRESET; 1966 if (sunaddr == NULL) 1967 goto out_free; 1968 1969 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1970 sk->sk_type); 1971 if (IS_ERR(other)) { 1972 err = PTR_ERR(other); 1973 other = NULL; 1974 goto out_free; 1975 } 1976 } 1977 1978 if (sk_filter(other, skb) < 0) { 1979 /* Toss the packet but do not return any error to the sender */ 1980 err = len; 1981 goto out_free; 1982 } 1983 1984 sk_locked = 0; 1985 unix_state_lock(other); 1986 restart_locked: 1987 err = -EPERM; 1988 if (!unix_may_send(sk, other)) 1989 goto out_unlock; 1990 1991 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1992 /* 1993 * Check with 1003.1g - what should 1994 * datagram error 1995 */ 1996 unix_state_unlock(other); 1997 sock_put(other); 1998 1999 if (!sk_locked) 2000 unix_state_lock(sk); 2001 2002 err = 0; 2003 if (sk->sk_type == SOCK_SEQPACKET) { 2004 /* We are here only when racing with unix_release_sock() 2005 * is clearing @other. Never change state to TCP_CLOSE 2006 * unlike SOCK_DGRAM wants. 2007 */ 2008 unix_state_unlock(sk); 2009 err = -EPIPE; 2010 } else if (unix_peer(sk) == other) { 2011 unix_peer(sk) = NULL; 2012 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 2013 2014 sk->sk_state = TCP_CLOSE; 2015 unix_state_unlock(sk); 2016 2017 unix_dgram_disconnected(sk, other); 2018 sock_put(other); 2019 err = -ECONNREFUSED; 2020 } else { 2021 unix_state_unlock(sk); 2022 } 2023 2024 other = NULL; 2025 if (err) 2026 goto out_free; 2027 goto restart; 2028 } 2029 2030 err = -EPIPE; 2031 if (other->sk_shutdown & RCV_SHUTDOWN) 2032 goto out_unlock; 2033 2034 if (sk->sk_type != SOCK_SEQPACKET) { 2035 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 2036 if (err) 2037 goto out_unlock; 2038 } 2039 2040 /* other == sk && unix_peer(other) != sk if 2041 * - unix_peer(sk) == NULL, destination address bound to sk 2042 * - unix_peer(sk) == sk by time of get but disconnected before lock 2043 */ 2044 if (other != sk && 2045 unlikely(unix_peer(other) != sk && 2046 unix_recvq_full_lockless(other))) { 2047 if (timeo) { 2048 timeo = unix_wait_for_peer(other, timeo); 2049 2050 err = sock_intr_errno(timeo); 2051 if (signal_pending(current)) 2052 goto out_free; 2053 2054 goto restart; 2055 } 2056 2057 if (!sk_locked) { 2058 unix_state_unlock(other); 2059 unix_state_double_lock(sk, other); 2060 } 2061 2062 if (unix_peer(sk) != other || 2063 unix_dgram_peer_wake_me(sk, other)) { 2064 err = -EAGAIN; 2065 sk_locked = 1; 2066 goto out_unlock; 2067 } 2068 2069 if (!sk_locked) { 2070 sk_locked = 1; 2071 goto restart_locked; 2072 } 2073 } 2074 2075 if (unlikely(sk_locked)) 2076 unix_state_unlock(sk); 2077 2078 if (sock_flag(other, SOCK_RCVTSTAMP)) 2079 __net_timestamp(skb); 2080 maybe_add_creds(skb, sock, other); 2081 scm_stat_add(other, skb); 2082 skb_queue_tail(&other->sk_receive_queue, skb); 2083 unix_state_unlock(other); 2084 other->sk_data_ready(other); 2085 sock_put(other); 2086 scm_destroy(&scm); 2087 return len; 2088 2089 out_unlock: 2090 if (sk_locked) 2091 unix_state_unlock(sk); 2092 unix_state_unlock(other); 2093 out_free: 2094 kfree_skb(skb); 2095 out: 2096 if (other) 2097 sock_put(other); 2098 scm_destroy(&scm); 2099 return err; 2100 } 2101 2102 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2103 * bytes, and a minimum of a full page. 2104 */ 2105 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2106 2107 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2108 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, 2109 struct scm_cookie *scm, bool fds_sent) 2110 { 2111 struct unix_sock *ousk = unix_sk(other); 2112 struct sk_buff *skb; 2113 int err = 0; 2114 2115 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2116 2117 if (!skb) 2118 return err; 2119 2120 err = unix_scm_to_skb(scm, skb, !fds_sent); 2121 if (err < 0) { 2122 kfree_skb(skb); 2123 return err; 2124 } 2125 skb_put(skb, 1); 2126 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2127 2128 if (err) { 2129 kfree_skb(skb); 2130 return err; 2131 } 2132 2133 unix_state_lock(other); 2134 2135 if (sock_flag(other, SOCK_DEAD) || 2136 (other->sk_shutdown & RCV_SHUTDOWN)) { 2137 unix_state_unlock(other); 2138 kfree_skb(skb); 2139 return -EPIPE; 2140 } 2141 2142 maybe_add_creds(skb, sock, other); 2143 skb_get(skb); 2144 2145 if (ousk->oob_skb) 2146 consume_skb(ousk->oob_skb); 2147 2148 WRITE_ONCE(ousk->oob_skb, skb); 2149 2150 scm_stat_add(other, skb); 2151 skb_queue_tail(&other->sk_receive_queue, skb); 2152 sk_send_sigurg(other); 2153 unix_state_unlock(other); 2154 other->sk_data_ready(other); 2155 2156 return err; 2157 } 2158 #endif 2159 2160 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2161 size_t len) 2162 { 2163 struct sock *sk = sock->sk; 2164 struct sock *other = NULL; 2165 int err, size; 2166 struct sk_buff *skb; 2167 int sent = 0; 2168 struct scm_cookie scm; 2169 bool fds_sent = false; 2170 int data_len; 2171 2172 wait_for_unix_gc(); 2173 err = scm_send(sock, msg, &scm, false); 2174 if (err < 0) 2175 return err; 2176 2177 err = -EOPNOTSUPP; 2178 if (msg->msg_flags & MSG_OOB) { 2179 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2180 if (len) 2181 len--; 2182 else 2183 #endif 2184 goto out_err; 2185 } 2186 2187 if (msg->msg_namelen) { 2188 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2189 goto out_err; 2190 } else { 2191 err = -ENOTCONN; 2192 other = unix_peer(sk); 2193 if (!other) 2194 goto out_err; 2195 } 2196 2197 if (sk->sk_shutdown & SEND_SHUTDOWN) 2198 goto pipe_err; 2199 2200 while (sent < len) { 2201 size = len - sent; 2202 2203 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2204 skb = sock_alloc_send_pskb(sk, 0, 0, 2205 msg->msg_flags & MSG_DONTWAIT, 2206 &err, 0); 2207 } else { 2208 /* Keep two messages in the pipe so it schedules better */ 2209 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2210 2211 /* allow fallback to order-0 allocations */ 2212 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2213 2214 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2215 2216 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2217 2218 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2219 msg->msg_flags & MSG_DONTWAIT, &err, 2220 get_order(UNIX_SKB_FRAGS_SZ)); 2221 } 2222 if (!skb) 2223 goto out_err; 2224 2225 /* Only send the fds in the first buffer */ 2226 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2227 if (err < 0) { 2228 kfree_skb(skb); 2229 goto out_err; 2230 } 2231 fds_sent = true; 2232 2233 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { 2234 err = skb_splice_from_iter(skb, &msg->msg_iter, size, 2235 sk->sk_allocation); 2236 if (err < 0) { 2237 kfree_skb(skb); 2238 goto out_err; 2239 } 2240 size = err; 2241 refcount_add(size, &sk->sk_wmem_alloc); 2242 } else { 2243 skb_put(skb, size - data_len); 2244 skb->data_len = data_len; 2245 skb->len = size; 2246 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2247 if (err) { 2248 kfree_skb(skb); 2249 goto out_err; 2250 } 2251 } 2252 2253 unix_state_lock(other); 2254 2255 if (sock_flag(other, SOCK_DEAD) || 2256 (other->sk_shutdown & RCV_SHUTDOWN)) 2257 goto pipe_err_free; 2258 2259 maybe_add_creds(skb, sock, other); 2260 scm_stat_add(other, skb); 2261 skb_queue_tail(&other->sk_receive_queue, skb); 2262 unix_state_unlock(other); 2263 other->sk_data_ready(other); 2264 sent += size; 2265 } 2266 2267 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2268 if (msg->msg_flags & MSG_OOB) { 2269 err = queue_oob(sock, msg, other, &scm, fds_sent); 2270 if (err) 2271 goto out_err; 2272 sent++; 2273 } 2274 #endif 2275 2276 scm_destroy(&scm); 2277 2278 return sent; 2279 2280 pipe_err_free: 2281 unix_state_unlock(other); 2282 kfree_skb(skb); 2283 pipe_err: 2284 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2285 send_sig(SIGPIPE, current, 0); 2286 err = -EPIPE; 2287 out_err: 2288 scm_destroy(&scm); 2289 return sent ? : err; 2290 } 2291 2292 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2293 size_t len) 2294 { 2295 int err; 2296 struct sock *sk = sock->sk; 2297 2298 err = sock_error(sk); 2299 if (err) 2300 return err; 2301 2302 if (sk->sk_state != TCP_ESTABLISHED) 2303 return -ENOTCONN; 2304 2305 if (msg->msg_namelen) 2306 msg->msg_namelen = 0; 2307 2308 return unix_dgram_sendmsg(sock, msg, len); 2309 } 2310 2311 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2312 size_t size, int flags) 2313 { 2314 struct sock *sk = sock->sk; 2315 2316 if (sk->sk_state != TCP_ESTABLISHED) 2317 return -ENOTCONN; 2318 2319 return unix_dgram_recvmsg(sock, msg, size, flags); 2320 } 2321 2322 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2323 { 2324 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2325 2326 if (addr) { 2327 msg->msg_namelen = addr->len; 2328 memcpy(msg->msg_name, addr->name, addr->len); 2329 } 2330 } 2331 2332 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2333 int flags) 2334 { 2335 struct scm_cookie scm; 2336 struct socket *sock = sk->sk_socket; 2337 struct unix_sock *u = unix_sk(sk); 2338 struct sk_buff *skb, *last; 2339 long timeo; 2340 int skip; 2341 int err; 2342 2343 err = -EOPNOTSUPP; 2344 if (flags&MSG_OOB) 2345 goto out; 2346 2347 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2348 2349 do { 2350 mutex_lock(&u->iolock); 2351 2352 skip = sk_peek_offset(sk, flags); 2353 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2354 &skip, &err, &last); 2355 if (skb) { 2356 if (!(flags & MSG_PEEK)) 2357 scm_stat_del(sk, skb); 2358 break; 2359 } 2360 2361 mutex_unlock(&u->iolock); 2362 2363 if (err != -EAGAIN) 2364 break; 2365 } while (timeo && 2366 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2367 &err, &timeo, last)); 2368 2369 if (!skb) { /* implies iolock unlocked */ 2370 unix_state_lock(sk); 2371 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2372 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2373 (sk->sk_shutdown & RCV_SHUTDOWN)) 2374 err = 0; 2375 unix_state_unlock(sk); 2376 goto out; 2377 } 2378 2379 if (wq_has_sleeper(&u->peer_wait)) 2380 wake_up_interruptible_sync_poll(&u->peer_wait, 2381 EPOLLOUT | EPOLLWRNORM | 2382 EPOLLWRBAND); 2383 2384 if (msg->msg_name) 2385 unix_copy_addr(msg, skb->sk); 2386 2387 if (size > skb->len - skip) 2388 size = skb->len - skip; 2389 else if (size < skb->len - skip) 2390 msg->msg_flags |= MSG_TRUNC; 2391 2392 err = skb_copy_datagram_msg(skb, skip, msg, size); 2393 if (err) 2394 goto out_free; 2395 2396 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2397 __sock_recv_timestamp(msg, sk, skb); 2398 2399 memset(&scm, 0, sizeof(scm)); 2400 2401 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2402 unix_set_secdata(&scm, skb); 2403 2404 if (!(flags & MSG_PEEK)) { 2405 if (UNIXCB(skb).fp) 2406 unix_detach_fds(&scm, skb); 2407 2408 sk_peek_offset_bwd(sk, skb->len); 2409 } else { 2410 /* It is questionable: on PEEK we could: 2411 - do not return fds - good, but too simple 8) 2412 - return fds, and do not return them on read (old strategy, 2413 apparently wrong) 2414 - clone fds (I chose it for now, it is the most universal 2415 solution) 2416 2417 POSIX 1003.1g does not actually define this clearly 2418 at all. POSIX 1003.1g doesn't define a lot of things 2419 clearly however! 2420 2421 */ 2422 2423 sk_peek_offset_fwd(sk, size); 2424 2425 if (UNIXCB(skb).fp) 2426 unix_peek_fds(&scm, skb); 2427 } 2428 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2429 2430 scm_recv(sock, msg, &scm, flags); 2431 2432 out_free: 2433 skb_free_datagram(sk, skb); 2434 mutex_unlock(&u->iolock); 2435 out: 2436 return err; 2437 } 2438 2439 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2440 int flags) 2441 { 2442 struct sock *sk = sock->sk; 2443 2444 #ifdef CONFIG_BPF_SYSCALL 2445 const struct proto *prot = READ_ONCE(sk->sk_prot); 2446 2447 if (prot != &unix_dgram_proto) 2448 return prot->recvmsg(sk, msg, size, flags, NULL); 2449 #endif 2450 return __unix_dgram_recvmsg(sk, msg, size, flags); 2451 } 2452 2453 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2454 { 2455 struct unix_sock *u = unix_sk(sk); 2456 struct sk_buff *skb; 2457 int err; 2458 2459 mutex_lock(&u->iolock); 2460 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2461 mutex_unlock(&u->iolock); 2462 if (!skb) 2463 return err; 2464 2465 return recv_actor(sk, skb); 2466 } 2467 2468 /* 2469 * Sleep until more data has arrived. But check for races.. 2470 */ 2471 static long unix_stream_data_wait(struct sock *sk, long timeo, 2472 struct sk_buff *last, unsigned int last_len, 2473 bool freezable) 2474 { 2475 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; 2476 struct sk_buff *tail; 2477 DEFINE_WAIT(wait); 2478 2479 unix_state_lock(sk); 2480 2481 for (;;) { 2482 prepare_to_wait(sk_sleep(sk), &wait, state); 2483 2484 tail = skb_peek_tail(&sk->sk_receive_queue); 2485 if (tail != last || 2486 (tail && tail->len != last_len) || 2487 sk->sk_err || 2488 (sk->sk_shutdown & RCV_SHUTDOWN) || 2489 signal_pending(current) || 2490 !timeo) 2491 break; 2492 2493 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2494 unix_state_unlock(sk); 2495 timeo = schedule_timeout(timeo); 2496 unix_state_lock(sk); 2497 2498 if (sock_flag(sk, SOCK_DEAD)) 2499 break; 2500 2501 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2502 } 2503 2504 finish_wait(sk_sleep(sk), &wait); 2505 unix_state_unlock(sk); 2506 return timeo; 2507 } 2508 2509 static unsigned int unix_skb_len(const struct sk_buff *skb) 2510 { 2511 return skb->len - UNIXCB(skb).consumed; 2512 } 2513 2514 struct unix_stream_read_state { 2515 int (*recv_actor)(struct sk_buff *, int, int, 2516 struct unix_stream_read_state *); 2517 struct socket *socket; 2518 struct msghdr *msg; 2519 struct pipe_inode_info *pipe; 2520 size_t size; 2521 int flags; 2522 unsigned int splice_flags; 2523 }; 2524 2525 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2526 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2527 { 2528 struct socket *sock = state->socket; 2529 struct sock *sk = sock->sk; 2530 struct unix_sock *u = unix_sk(sk); 2531 int chunk = 1; 2532 struct sk_buff *oob_skb; 2533 2534 mutex_lock(&u->iolock); 2535 unix_state_lock(sk); 2536 2537 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2538 unix_state_unlock(sk); 2539 mutex_unlock(&u->iolock); 2540 return -EINVAL; 2541 } 2542 2543 oob_skb = u->oob_skb; 2544 2545 if (!(state->flags & MSG_PEEK)) 2546 WRITE_ONCE(u->oob_skb, NULL); 2547 2548 unix_state_unlock(sk); 2549 2550 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2551 2552 if (!(state->flags & MSG_PEEK)) { 2553 UNIXCB(oob_skb).consumed += 1; 2554 kfree_skb(oob_skb); 2555 } 2556 2557 mutex_unlock(&u->iolock); 2558 2559 if (chunk < 0) 2560 return -EFAULT; 2561 2562 state->msg->msg_flags |= MSG_OOB; 2563 return 1; 2564 } 2565 2566 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2567 int flags, int copied) 2568 { 2569 struct unix_sock *u = unix_sk(sk); 2570 2571 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2572 skb_unlink(skb, &sk->sk_receive_queue); 2573 consume_skb(skb); 2574 skb = NULL; 2575 } else { 2576 if (skb == u->oob_skb) { 2577 if (copied) { 2578 skb = NULL; 2579 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2580 if (!(flags & MSG_PEEK)) { 2581 WRITE_ONCE(u->oob_skb, NULL); 2582 consume_skb(skb); 2583 } 2584 } else if (!(flags & MSG_PEEK)) { 2585 skb_unlink(skb, &sk->sk_receive_queue); 2586 consume_skb(skb); 2587 skb = skb_peek(&sk->sk_receive_queue); 2588 } 2589 } 2590 } 2591 return skb; 2592 } 2593 #endif 2594 2595 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2596 { 2597 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2598 return -ENOTCONN; 2599 2600 return unix_read_skb(sk, recv_actor); 2601 } 2602 2603 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2604 bool freezable) 2605 { 2606 struct scm_cookie scm; 2607 struct socket *sock = state->socket; 2608 struct sock *sk = sock->sk; 2609 struct unix_sock *u = unix_sk(sk); 2610 int copied = 0; 2611 int flags = state->flags; 2612 int noblock = flags & MSG_DONTWAIT; 2613 bool check_creds = false; 2614 int target; 2615 int err = 0; 2616 long timeo; 2617 int skip; 2618 size_t size = state->size; 2619 unsigned int last_len; 2620 2621 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2622 err = -EINVAL; 2623 goto out; 2624 } 2625 2626 if (unlikely(flags & MSG_OOB)) { 2627 err = -EOPNOTSUPP; 2628 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2629 err = unix_stream_recv_urg(state); 2630 #endif 2631 goto out; 2632 } 2633 2634 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2635 timeo = sock_rcvtimeo(sk, noblock); 2636 2637 memset(&scm, 0, sizeof(scm)); 2638 2639 /* Lock the socket to prevent queue disordering 2640 * while sleeps in memcpy_tomsg 2641 */ 2642 mutex_lock(&u->iolock); 2643 2644 skip = max(sk_peek_offset(sk, flags), 0); 2645 2646 do { 2647 int chunk; 2648 bool drop_skb; 2649 struct sk_buff *skb, *last; 2650 2651 redo: 2652 unix_state_lock(sk); 2653 if (sock_flag(sk, SOCK_DEAD)) { 2654 err = -ECONNRESET; 2655 goto unlock; 2656 } 2657 last = skb = skb_peek(&sk->sk_receive_queue); 2658 last_len = last ? last->len : 0; 2659 2660 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2661 if (skb) { 2662 skb = manage_oob(skb, sk, flags, copied); 2663 if (!skb) { 2664 unix_state_unlock(sk); 2665 if (copied) 2666 break; 2667 goto redo; 2668 } 2669 } 2670 #endif 2671 again: 2672 if (skb == NULL) { 2673 if (copied >= target) 2674 goto unlock; 2675 2676 /* 2677 * POSIX 1003.1g mandates this order. 2678 */ 2679 2680 err = sock_error(sk); 2681 if (err) 2682 goto unlock; 2683 if (sk->sk_shutdown & RCV_SHUTDOWN) 2684 goto unlock; 2685 2686 unix_state_unlock(sk); 2687 if (!timeo) { 2688 err = -EAGAIN; 2689 break; 2690 } 2691 2692 mutex_unlock(&u->iolock); 2693 2694 timeo = unix_stream_data_wait(sk, timeo, last, 2695 last_len, freezable); 2696 2697 if (signal_pending(current)) { 2698 err = sock_intr_errno(timeo); 2699 scm_destroy(&scm); 2700 goto out; 2701 } 2702 2703 mutex_lock(&u->iolock); 2704 goto redo; 2705 unlock: 2706 unix_state_unlock(sk); 2707 break; 2708 } 2709 2710 while (skip >= unix_skb_len(skb)) { 2711 skip -= unix_skb_len(skb); 2712 last = skb; 2713 last_len = skb->len; 2714 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2715 if (!skb) 2716 goto again; 2717 } 2718 2719 unix_state_unlock(sk); 2720 2721 if (check_creds) { 2722 /* Never glue messages from different writers */ 2723 if (!unix_skb_scm_eq(skb, &scm)) 2724 break; 2725 } else if (test_bit(SOCK_PASSCRED, &sock->flags) || 2726 test_bit(SOCK_PASSPIDFD, &sock->flags)) { 2727 /* Copy credentials */ 2728 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2729 unix_set_secdata(&scm, skb); 2730 check_creds = true; 2731 } 2732 2733 /* Copy address just once */ 2734 if (state->msg && state->msg->msg_name) { 2735 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2736 state->msg->msg_name); 2737 unix_copy_addr(state->msg, skb->sk); 2738 sunaddr = NULL; 2739 } 2740 2741 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2742 skb_get(skb); 2743 chunk = state->recv_actor(skb, skip, chunk, state); 2744 drop_skb = !unix_skb_len(skb); 2745 /* skb is only safe to use if !drop_skb */ 2746 consume_skb(skb); 2747 if (chunk < 0) { 2748 if (copied == 0) 2749 copied = -EFAULT; 2750 break; 2751 } 2752 copied += chunk; 2753 size -= chunk; 2754 2755 if (drop_skb) { 2756 /* the skb was touched by a concurrent reader; 2757 * we should not expect anything from this skb 2758 * anymore and assume it invalid - we can be 2759 * sure it was dropped from the socket queue 2760 * 2761 * let's report a short read 2762 */ 2763 err = 0; 2764 break; 2765 } 2766 2767 /* Mark read part of skb as used */ 2768 if (!(flags & MSG_PEEK)) { 2769 UNIXCB(skb).consumed += chunk; 2770 2771 sk_peek_offset_bwd(sk, chunk); 2772 2773 if (UNIXCB(skb).fp) { 2774 scm_stat_del(sk, skb); 2775 unix_detach_fds(&scm, skb); 2776 } 2777 2778 if (unix_skb_len(skb)) 2779 break; 2780 2781 skb_unlink(skb, &sk->sk_receive_queue); 2782 consume_skb(skb); 2783 2784 if (scm.fp) 2785 break; 2786 } else { 2787 /* It is questionable, see note in unix_dgram_recvmsg. 2788 */ 2789 if (UNIXCB(skb).fp) 2790 unix_peek_fds(&scm, skb); 2791 2792 sk_peek_offset_fwd(sk, chunk); 2793 2794 if (UNIXCB(skb).fp) 2795 break; 2796 2797 skip = 0; 2798 last = skb; 2799 last_len = skb->len; 2800 unix_state_lock(sk); 2801 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2802 if (skb) 2803 goto again; 2804 unix_state_unlock(sk); 2805 break; 2806 } 2807 } while (size); 2808 2809 mutex_unlock(&u->iolock); 2810 if (state->msg && check_creds) 2811 scm_recv(sock, state->msg, &scm, flags); 2812 else 2813 scm_destroy(&scm); 2814 out: 2815 return copied ? : err; 2816 } 2817 2818 static int unix_stream_read_actor(struct sk_buff *skb, 2819 int skip, int chunk, 2820 struct unix_stream_read_state *state) 2821 { 2822 int ret; 2823 2824 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2825 state->msg, chunk); 2826 return ret ?: chunk; 2827 } 2828 2829 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2830 size_t size, int flags) 2831 { 2832 struct unix_stream_read_state state = { 2833 .recv_actor = unix_stream_read_actor, 2834 .socket = sk->sk_socket, 2835 .msg = msg, 2836 .size = size, 2837 .flags = flags 2838 }; 2839 2840 return unix_stream_read_generic(&state, true); 2841 } 2842 2843 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2844 size_t size, int flags) 2845 { 2846 struct unix_stream_read_state state = { 2847 .recv_actor = unix_stream_read_actor, 2848 .socket = sock, 2849 .msg = msg, 2850 .size = size, 2851 .flags = flags 2852 }; 2853 2854 #ifdef CONFIG_BPF_SYSCALL 2855 struct sock *sk = sock->sk; 2856 const struct proto *prot = READ_ONCE(sk->sk_prot); 2857 2858 if (prot != &unix_stream_proto) 2859 return prot->recvmsg(sk, msg, size, flags, NULL); 2860 #endif 2861 return unix_stream_read_generic(&state, true); 2862 } 2863 2864 static int unix_stream_splice_actor(struct sk_buff *skb, 2865 int skip, int chunk, 2866 struct unix_stream_read_state *state) 2867 { 2868 return skb_splice_bits(skb, state->socket->sk, 2869 UNIXCB(skb).consumed + skip, 2870 state->pipe, chunk, state->splice_flags); 2871 } 2872 2873 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2874 struct pipe_inode_info *pipe, 2875 size_t size, unsigned int flags) 2876 { 2877 struct unix_stream_read_state state = { 2878 .recv_actor = unix_stream_splice_actor, 2879 .socket = sock, 2880 .pipe = pipe, 2881 .size = size, 2882 .splice_flags = flags, 2883 }; 2884 2885 if (unlikely(*ppos)) 2886 return -ESPIPE; 2887 2888 if (sock->file->f_flags & O_NONBLOCK || 2889 flags & SPLICE_F_NONBLOCK) 2890 state.flags = MSG_DONTWAIT; 2891 2892 return unix_stream_read_generic(&state, false); 2893 } 2894 2895 static int unix_shutdown(struct socket *sock, int mode) 2896 { 2897 struct sock *sk = sock->sk; 2898 struct sock *other; 2899 2900 if (mode < SHUT_RD || mode > SHUT_RDWR) 2901 return -EINVAL; 2902 /* This maps: 2903 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2904 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2905 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2906 */ 2907 ++mode; 2908 2909 unix_state_lock(sk); 2910 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); 2911 other = unix_peer(sk); 2912 if (other) 2913 sock_hold(other); 2914 unix_state_unlock(sk); 2915 sk->sk_state_change(sk); 2916 2917 if (other && 2918 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2919 2920 int peer_mode = 0; 2921 const struct proto *prot = READ_ONCE(other->sk_prot); 2922 2923 if (prot->unhash) 2924 prot->unhash(other); 2925 if (mode&RCV_SHUTDOWN) 2926 peer_mode |= SEND_SHUTDOWN; 2927 if (mode&SEND_SHUTDOWN) 2928 peer_mode |= RCV_SHUTDOWN; 2929 unix_state_lock(other); 2930 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); 2931 unix_state_unlock(other); 2932 other->sk_state_change(other); 2933 if (peer_mode == SHUTDOWN_MASK) 2934 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2935 else if (peer_mode & RCV_SHUTDOWN) 2936 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2937 } 2938 if (other) 2939 sock_put(other); 2940 2941 return 0; 2942 } 2943 2944 long unix_inq_len(struct sock *sk) 2945 { 2946 struct sk_buff *skb; 2947 long amount = 0; 2948 2949 if (sk->sk_state == TCP_LISTEN) 2950 return -EINVAL; 2951 2952 spin_lock(&sk->sk_receive_queue.lock); 2953 if (sk->sk_type == SOCK_STREAM || 2954 sk->sk_type == SOCK_SEQPACKET) { 2955 skb_queue_walk(&sk->sk_receive_queue, skb) 2956 amount += unix_skb_len(skb); 2957 } else { 2958 skb = skb_peek(&sk->sk_receive_queue); 2959 if (skb) 2960 amount = skb->len; 2961 } 2962 spin_unlock(&sk->sk_receive_queue.lock); 2963 2964 return amount; 2965 } 2966 EXPORT_SYMBOL_GPL(unix_inq_len); 2967 2968 long unix_outq_len(struct sock *sk) 2969 { 2970 return sk_wmem_alloc_get(sk); 2971 } 2972 EXPORT_SYMBOL_GPL(unix_outq_len); 2973 2974 static int unix_open_file(struct sock *sk) 2975 { 2976 struct path path; 2977 struct file *f; 2978 int fd; 2979 2980 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2981 return -EPERM; 2982 2983 if (!smp_load_acquire(&unix_sk(sk)->addr)) 2984 return -ENOENT; 2985 2986 path = unix_sk(sk)->path; 2987 if (!path.dentry) 2988 return -ENOENT; 2989 2990 path_get(&path); 2991 2992 fd = get_unused_fd_flags(O_CLOEXEC); 2993 if (fd < 0) 2994 goto out; 2995 2996 f = dentry_open(&path, O_PATH, current_cred()); 2997 if (IS_ERR(f)) { 2998 put_unused_fd(fd); 2999 fd = PTR_ERR(f); 3000 goto out; 3001 } 3002 3003 fd_install(fd, f); 3004 out: 3005 path_put(&path); 3006 3007 return fd; 3008 } 3009 3010 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3011 { 3012 struct sock *sk = sock->sk; 3013 long amount = 0; 3014 int err; 3015 3016 switch (cmd) { 3017 case SIOCOUTQ: 3018 amount = unix_outq_len(sk); 3019 err = put_user(amount, (int __user *)arg); 3020 break; 3021 case SIOCINQ: 3022 amount = unix_inq_len(sk); 3023 if (amount < 0) 3024 err = amount; 3025 else 3026 err = put_user(amount, (int __user *)arg); 3027 break; 3028 case SIOCUNIXFILE: 3029 err = unix_open_file(sk); 3030 break; 3031 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3032 case SIOCATMARK: 3033 { 3034 struct sk_buff *skb; 3035 int answ = 0; 3036 3037 skb = skb_peek(&sk->sk_receive_queue); 3038 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3039 answ = 1; 3040 err = put_user(answ, (int __user *)arg); 3041 } 3042 break; 3043 #endif 3044 default: 3045 err = -ENOIOCTLCMD; 3046 break; 3047 } 3048 return err; 3049 } 3050 3051 #ifdef CONFIG_COMPAT 3052 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3053 { 3054 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3055 } 3056 #endif 3057 3058 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3059 { 3060 struct sock *sk = sock->sk; 3061 __poll_t mask; 3062 u8 shutdown; 3063 3064 sock_poll_wait(file, sock, wait); 3065 mask = 0; 3066 shutdown = READ_ONCE(sk->sk_shutdown); 3067 3068 /* exceptional events? */ 3069 if (READ_ONCE(sk->sk_err)) 3070 mask |= EPOLLERR; 3071 if (shutdown == SHUTDOWN_MASK) 3072 mask |= EPOLLHUP; 3073 if (shutdown & RCV_SHUTDOWN) 3074 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3075 3076 /* readable? */ 3077 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3078 mask |= EPOLLIN | EPOLLRDNORM; 3079 if (sk_is_readable(sk)) 3080 mask |= EPOLLIN | EPOLLRDNORM; 3081 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3082 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3083 mask |= EPOLLPRI; 3084 #endif 3085 3086 /* Connection-based need to check for termination and startup */ 3087 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3088 sk->sk_state == TCP_CLOSE) 3089 mask |= EPOLLHUP; 3090 3091 /* 3092 * we set writable also when the other side has shut down the 3093 * connection. This prevents stuck sockets. 3094 */ 3095 if (unix_writable(sk)) 3096 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3097 3098 return mask; 3099 } 3100 3101 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3102 poll_table *wait) 3103 { 3104 struct sock *sk = sock->sk, *other; 3105 unsigned int writable; 3106 __poll_t mask; 3107 u8 shutdown; 3108 3109 sock_poll_wait(file, sock, wait); 3110 mask = 0; 3111 shutdown = READ_ONCE(sk->sk_shutdown); 3112 3113 /* exceptional events? */ 3114 if (READ_ONCE(sk->sk_err) || 3115 !skb_queue_empty_lockless(&sk->sk_error_queue)) 3116 mask |= EPOLLERR | 3117 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3118 3119 if (shutdown & RCV_SHUTDOWN) 3120 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3121 if (shutdown == SHUTDOWN_MASK) 3122 mask |= EPOLLHUP; 3123 3124 /* readable? */ 3125 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3126 mask |= EPOLLIN | EPOLLRDNORM; 3127 if (sk_is_readable(sk)) 3128 mask |= EPOLLIN | EPOLLRDNORM; 3129 3130 /* Connection-based need to check for termination and startup */ 3131 if (sk->sk_type == SOCK_SEQPACKET) { 3132 if (sk->sk_state == TCP_CLOSE) 3133 mask |= EPOLLHUP; 3134 /* connection hasn't started yet? */ 3135 if (sk->sk_state == TCP_SYN_SENT) 3136 return mask; 3137 } 3138 3139 /* No write status requested, avoid expensive OUT tests. */ 3140 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3141 return mask; 3142 3143 writable = unix_writable(sk); 3144 if (writable) { 3145 unix_state_lock(sk); 3146 3147 other = unix_peer(sk); 3148 if (other && unix_peer(other) != sk && 3149 unix_recvq_full_lockless(other) && 3150 unix_dgram_peer_wake_me(sk, other)) 3151 writable = 0; 3152 3153 unix_state_unlock(sk); 3154 } 3155 3156 if (writable) 3157 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3158 else 3159 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3160 3161 return mask; 3162 } 3163 3164 #ifdef CONFIG_PROC_FS 3165 3166 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3167 3168 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3169 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3170 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3171 3172 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3173 { 3174 unsigned long offset = get_offset(*pos); 3175 unsigned long bucket = get_bucket(*pos); 3176 unsigned long count = 0; 3177 struct sock *sk; 3178 3179 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3180 sk; sk = sk_next(sk)) { 3181 if (++count == offset) 3182 break; 3183 } 3184 3185 return sk; 3186 } 3187 3188 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3189 { 3190 unsigned long bucket = get_bucket(*pos); 3191 struct net *net = seq_file_net(seq); 3192 struct sock *sk; 3193 3194 while (bucket < UNIX_HASH_SIZE) { 3195 spin_lock(&net->unx.table.locks[bucket]); 3196 3197 sk = unix_from_bucket(seq, pos); 3198 if (sk) 3199 return sk; 3200 3201 spin_unlock(&net->unx.table.locks[bucket]); 3202 3203 *pos = set_bucket_offset(++bucket, 1); 3204 } 3205 3206 return NULL; 3207 } 3208 3209 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3210 loff_t *pos) 3211 { 3212 unsigned long bucket = get_bucket(*pos); 3213 3214 sk = sk_next(sk); 3215 if (sk) 3216 return sk; 3217 3218 3219 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3220 3221 *pos = set_bucket_offset(++bucket, 1); 3222 3223 return unix_get_first(seq, pos); 3224 } 3225 3226 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3227 { 3228 if (!*pos) 3229 return SEQ_START_TOKEN; 3230 3231 return unix_get_first(seq, pos); 3232 } 3233 3234 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3235 { 3236 ++*pos; 3237 3238 if (v == SEQ_START_TOKEN) 3239 return unix_get_first(seq, pos); 3240 3241 return unix_get_next(seq, v, pos); 3242 } 3243 3244 static void unix_seq_stop(struct seq_file *seq, void *v) 3245 { 3246 struct sock *sk = v; 3247 3248 if (sk) 3249 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3250 } 3251 3252 static int unix_seq_show(struct seq_file *seq, void *v) 3253 { 3254 3255 if (v == SEQ_START_TOKEN) 3256 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3257 "Inode Path\n"); 3258 else { 3259 struct sock *s = v; 3260 struct unix_sock *u = unix_sk(s); 3261 unix_state_lock(s); 3262 3263 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3264 s, 3265 refcount_read(&s->sk_refcnt), 3266 0, 3267 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3268 s->sk_type, 3269 s->sk_socket ? 3270 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3271 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3272 sock_i_ino(s)); 3273 3274 if (u->addr) { // under a hash table lock here 3275 int i, len; 3276 seq_putc(seq, ' '); 3277 3278 i = 0; 3279 len = u->addr->len - 3280 offsetof(struct sockaddr_un, sun_path); 3281 if (u->addr->name->sun_path[0]) { 3282 len--; 3283 } else { 3284 seq_putc(seq, '@'); 3285 i++; 3286 } 3287 for ( ; i < len; i++) 3288 seq_putc(seq, u->addr->name->sun_path[i] ?: 3289 '@'); 3290 } 3291 unix_state_unlock(s); 3292 seq_putc(seq, '\n'); 3293 } 3294 3295 return 0; 3296 } 3297 3298 static const struct seq_operations unix_seq_ops = { 3299 .start = unix_seq_start, 3300 .next = unix_seq_next, 3301 .stop = unix_seq_stop, 3302 .show = unix_seq_show, 3303 }; 3304 3305 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3306 struct bpf_unix_iter_state { 3307 struct seq_net_private p; 3308 unsigned int cur_sk; 3309 unsigned int end_sk; 3310 unsigned int max_sk; 3311 struct sock **batch; 3312 bool st_bucket_done; 3313 }; 3314 3315 struct bpf_iter__unix { 3316 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3317 __bpf_md_ptr(struct unix_sock *, unix_sk); 3318 uid_t uid __aligned(8); 3319 }; 3320 3321 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3322 struct unix_sock *unix_sk, uid_t uid) 3323 { 3324 struct bpf_iter__unix ctx; 3325 3326 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3327 ctx.meta = meta; 3328 ctx.unix_sk = unix_sk; 3329 ctx.uid = uid; 3330 return bpf_iter_run_prog(prog, &ctx); 3331 } 3332 3333 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3334 3335 { 3336 struct bpf_unix_iter_state *iter = seq->private; 3337 unsigned int expected = 1; 3338 struct sock *sk; 3339 3340 sock_hold(start_sk); 3341 iter->batch[iter->end_sk++] = start_sk; 3342 3343 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3344 if (iter->end_sk < iter->max_sk) { 3345 sock_hold(sk); 3346 iter->batch[iter->end_sk++] = sk; 3347 } 3348 3349 expected++; 3350 } 3351 3352 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3353 3354 return expected; 3355 } 3356 3357 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3358 { 3359 while (iter->cur_sk < iter->end_sk) 3360 sock_put(iter->batch[iter->cur_sk++]); 3361 } 3362 3363 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3364 unsigned int new_batch_sz) 3365 { 3366 struct sock **new_batch; 3367 3368 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3369 GFP_USER | __GFP_NOWARN); 3370 if (!new_batch) 3371 return -ENOMEM; 3372 3373 bpf_iter_unix_put_batch(iter); 3374 kvfree(iter->batch); 3375 iter->batch = new_batch; 3376 iter->max_sk = new_batch_sz; 3377 3378 return 0; 3379 } 3380 3381 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3382 loff_t *pos) 3383 { 3384 struct bpf_unix_iter_state *iter = seq->private; 3385 unsigned int expected; 3386 bool resized = false; 3387 struct sock *sk; 3388 3389 if (iter->st_bucket_done) 3390 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3391 3392 again: 3393 /* Get a new batch */ 3394 iter->cur_sk = 0; 3395 iter->end_sk = 0; 3396 3397 sk = unix_get_first(seq, pos); 3398 if (!sk) 3399 return NULL; /* Done */ 3400 3401 expected = bpf_iter_unix_hold_batch(seq, sk); 3402 3403 if (iter->end_sk == expected) { 3404 iter->st_bucket_done = true; 3405 return sk; 3406 } 3407 3408 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3409 resized = true; 3410 goto again; 3411 } 3412 3413 return sk; 3414 } 3415 3416 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3417 { 3418 if (!*pos) 3419 return SEQ_START_TOKEN; 3420 3421 /* bpf iter does not support lseek, so it always 3422 * continue from where it was stop()-ped. 3423 */ 3424 return bpf_iter_unix_batch(seq, pos); 3425 } 3426 3427 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3428 { 3429 struct bpf_unix_iter_state *iter = seq->private; 3430 struct sock *sk; 3431 3432 /* Whenever seq_next() is called, the iter->cur_sk is 3433 * done with seq_show(), so advance to the next sk in 3434 * the batch. 3435 */ 3436 if (iter->cur_sk < iter->end_sk) 3437 sock_put(iter->batch[iter->cur_sk++]); 3438 3439 ++*pos; 3440 3441 if (iter->cur_sk < iter->end_sk) 3442 sk = iter->batch[iter->cur_sk]; 3443 else 3444 sk = bpf_iter_unix_batch(seq, pos); 3445 3446 return sk; 3447 } 3448 3449 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3450 { 3451 struct bpf_iter_meta meta; 3452 struct bpf_prog *prog; 3453 struct sock *sk = v; 3454 uid_t uid; 3455 bool slow; 3456 int ret; 3457 3458 if (v == SEQ_START_TOKEN) 3459 return 0; 3460 3461 slow = lock_sock_fast(sk); 3462 3463 if (unlikely(sk_unhashed(sk))) { 3464 ret = SEQ_SKIP; 3465 goto unlock; 3466 } 3467 3468 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3469 meta.seq = seq; 3470 prog = bpf_iter_get_info(&meta, false); 3471 ret = unix_prog_seq_show(prog, &meta, v, uid); 3472 unlock: 3473 unlock_sock_fast(sk, slow); 3474 return ret; 3475 } 3476 3477 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3478 { 3479 struct bpf_unix_iter_state *iter = seq->private; 3480 struct bpf_iter_meta meta; 3481 struct bpf_prog *prog; 3482 3483 if (!v) { 3484 meta.seq = seq; 3485 prog = bpf_iter_get_info(&meta, true); 3486 if (prog) 3487 (void)unix_prog_seq_show(prog, &meta, v, 0); 3488 } 3489 3490 if (iter->cur_sk < iter->end_sk) 3491 bpf_iter_unix_put_batch(iter); 3492 } 3493 3494 static const struct seq_operations bpf_iter_unix_seq_ops = { 3495 .start = bpf_iter_unix_seq_start, 3496 .next = bpf_iter_unix_seq_next, 3497 .stop = bpf_iter_unix_seq_stop, 3498 .show = bpf_iter_unix_seq_show, 3499 }; 3500 #endif 3501 #endif 3502 3503 static const struct net_proto_family unix_family_ops = { 3504 .family = PF_UNIX, 3505 .create = unix_create, 3506 .owner = THIS_MODULE, 3507 }; 3508 3509 3510 static int __net_init unix_net_init(struct net *net) 3511 { 3512 int i; 3513 3514 net->unx.sysctl_max_dgram_qlen = 10; 3515 if (unix_sysctl_register(net)) 3516 goto out; 3517 3518 #ifdef CONFIG_PROC_FS 3519 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3520 sizeof(struct seq_net_private))) 3521 goto err_sysctl; 3522 #endif 3523 3524 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3525 sizeof(spinlock_t), GFP_KERNEL); 3526 if (!net->unx.table.locks) 3527 goto err_proc; 3528 3529 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3530 sizeof(struct hlist_head), 3531 GFP_KERNEL); 3532 if (!net->unx.table.buckets) 3533 goto free_locks; 3534 3535 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3536 spin_lock_init(&net->unx.table.locks[i]); 3537 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3538 } 3539 3540 return 0; 3541 3542 free_locks: 3543 kvfree(net->unx.table.locks); 3544 err_proc: 3545 #ifdef CONFIG_PROC_FS 3546 remove_proc_entry("unix", net->proc_net); 3547 err_sysctl: 3548 #endif 3549 unix_sysctl_unregister(net); 3550 out: 3551 return -ENOMEM; 3552 } 3553 3554 static void __net_exit unix_net_exit(struct net *net) 3555 { 3556 kvfree(net->unx.table.buckets); 3557 kvfree(net->unx.table.locks); 3558 unix_sysctl_unregister(net); 3559 remove_proc_entry("unix", net->proc_net); 3560 } 3561 3562 static struct pernet_operations unix_net_ops = { 3563 .init = unix_net_init, 3564 .exit = unix_net_exit, 3565 }; 3566 3567 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3568 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3569 struct unix_sock *unix_sk, uid_t uid) 3570 3571 #define INIT_BATCH_SZ 16 3572 3573 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3574 { 3575 struct bpf_unix_iter_state *iter = priv_data; 3576 int err; 3577 3578 err = bpf_iter_init_seq_net(priv_data, aux); 3579 if (err) 3580 return err; 3581 3582 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3583 if (err) { 3584 bpf_iter_fini_seq_net(priv_data); 3585 return err; 3586 } 3587 3588 return 0; 3589 } 3590 3591 static void bpf_iter_fini_unix(void *priv_data) 3592 { 3593 struct bpf_unix_iter_state *iter = priv_data; 3594 3595 bpf_iter_fini_seq_net(priv_data); 3596 kvfree(iter->batch); 3597 } 3598 3599 static const struct bpf_iter_seq_info unix_seq_info = { 3600 .seq_ops = &bpf_iter_unix_seq_ops, 3601 .init_seq_private = bpf_iter_init_unix, 3602 .fini_seq_private = bpf_iter_fini_unix, 3603 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3604 }; 3605 3606 static const struct bpf_func_proto * 3607 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3608 const struct bpf_prog *prog) 3609 { 3610 switch (func_id) { 3611 case BPF_FUNC_setsockopt: 3612 return &bpf_sk_setsockopt_proto; 3613 case BPF_FUNC_getsockopt: 3614 return &bpf_sk_getsockopt_proto; 3615 default: 3616 return NULL; 3617 } 3618 } 3619 3620 static struct bpf_iter_reg unix_reg_info = { 3621 .target = "unix", 3622 .ctx_arg_info_size = 1, 3623 .ctx_arg_info = { 3624 { offsetof(struct bpf_iter__unix, unix_sk), 3625 PTR_TO_BTF_ID_OR_NULL }, 3626 }, 3627 .get_func_proto = bpf_iter_unix_get_func_proto, 3628 .seq_info = &unix_seq_info, 3629 }; 3630 3631 static void __init bpf_iter_register(void) 3632 { 3633 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3634 if (bpf_iter_reg_target(&unix_reg_info)) 3635 pr_warn("Warning: could not register bpf iterator unix\n"); 3636 } 3637 #endif 3638 3639 static int __init af_unix_init(void) 3640 { 3641 int i, rc = -1; 3642 3643 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3644 3645 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3646 spin_lock_init(&bsd_socket_locks[i]); 3647 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3648 } 3649 3650 rc = proto_register(&unix_dgram_proto, 1); 3651 if (rc != 0) { 3652 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3653 goto out; 3654 } 3655 3656 rc = proto_register(&unix_stream_proto, 1); 3657 if (rc != 0) { 3658 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3659 proto_unregister(&unix_dgram_proto); 3660 goto out; 3661 } 3662 3663 sock_register(&unix_family_ops); 3664 register_pernet_subsys(&unix_net_ops); 3665 unix_bpf_build_proto(); 3666 3667 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3668 bpf_iter_register(); 3669 #endif 3670 3671 out: 3672 return rc; 3673 } 3674 3675 static void __exit af_unix_exit(void) 3676 { 3677 sock_unregister(PF_UNIX); 3678 proto_unregister(&unix_dgram_proto); 3679 proto_unregister(&unix_stream_proto); 3680 unregister_pernet_subsys(&unix_net_ops); 3681 } 3682 3683 /* Earlier than device_initcall() so that other drivers invoking 3684 request_module() don't end up in a loop when modprobe tries 3685 to use a UNIX socket. But later than subsys_initcall() because 3686 we depend on stuff initialised there */ 3687 fs_initcall(af_unix_init); 3688 module_exit(af_unix_exit); 3689 3690 MODULE_LICENSE("GPL"); 3691 MODULE_ALIAS_NETPROTO(PF_UNIX); 3692