1 /* 2 * NET4: Implementation of BSD Unix domain sockets. 3 * 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Fixes: 12 * Linus Torvalds : Assorted bug cures. 13 * Niibe Yutaka : async I/O support. 14 * Carsten Paeth : PF_UNIX check, address fixes. 15 * Alan Cox : Limit size of allocated blocks. 16 * Alan Cox : Fixed the stupid socketpair bug. 17 * Alan Cox : BSD compatibility fine tuning. 18 * Alan Cox : Fixed a bug in connect when interrupted. 19 * Alan Cox : Sorted out a proper draft version of 20 * file descriptor passing hacked up from 21 * Mike Shaver's work. 22 * Marty Leisner : Fixes to fd passing 23 * Nick Nevin : recvmsg bugfix. 24 * Alan Cox : Started proper garbage collector 25 * Heiko EiBfeldt : Missing verify_area check 26 * Alan Cox : Started POSIXisms 27 * Andreas Schwab : Replace inode by dentry for proper 28 * reference counting 29 * Kirk Petersen : Made this a module 30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 31 * Lots of bug fixes. 32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 33 * by above two patches. 34 * Andrea Arcangeli : If possible we block in connect(2) 35 * if the max backlog of the listen socket 36 * is been reached. This won't break 37 * old apps and it will avoid huge amount 38 * of socks hashed (this for unix_gc() 39 * performances reasons). 40 * Security fix that limits the max 41 * number of socks to 2*max_files and 42 * the number of skb queueable in the 43 * dgram receiver. 44 * Artur Skawina : Hash function optimizations 45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 46 * Malcolm Beattie : Set peercred for socketpair 47 * Michal Ostrowski : Module initialization cleanup. 48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 49 * the core infrastructure is doing that 50 * for all net proto families now (2.5.69+) 51 * 52 * 53 * Known differences from reference BSD that was tested: 54 * 55 * [TO FIX] 56 * ECONNREFUSED is not returned from one end of a connected() socket to the 57 * other the moment one end closes. 58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 59 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 60 * [NOT TO FIX] 61 * accept() returns a path name even if the connecting socket has closed 62 * in the meantime (BSD loses the path and gives up). 63 * accept() returns 0 length path for an unbound connector. BSD returns 16 64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 65 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 66 * BSD af_unix apparently has connect forgetting to block properly. 67 * (need to check this with the POSIX spec in detail) 68 * 69 * Differences from 2.0.0-11-... (ANK) 70 * Bug fixes and improvements. 71 * - client shutdown killed server socket. 72 * - removed all useless cli/sti pairs. 73 * 74 * Semantic changes/extensions. 75 * - generic control message passing. 76 * - SCM_CREDENTIALS control message. 77 * - "Abstract" (not FS based) socket bindings. 78 * Abstract names are sequences of bytes (not zero terminated) 79 * started by 0, so that this name space does not intersect 80 * with BSD names. 81 */ 82 83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 84 85 #include <linux/module.h> 86 #include <linux/kernel.h> 87 #include <linux/signal.h> 88 #include <linux/sched.h> 89 #include <linux/errno.h> 90 #include <linux/string.h> 91 #include <linux/stat.h> 92 #include <linux/dcache.h> 93 #include <linux/namei.h> 94 #include <linux/socket.h> 95 #include <linux/un.h> 96 #include <linux/fcntl.h> 97 #include <linux/termios.h> 98 #include <linux/sockios.h> 99 #include <linux/net.h> 100 #include <linux/in.h> 101 #include <linux/fs.h> 102 #include <linux/slab.h> 103 #include <asm/uaccess.h> 104 #include <linux/skbuff.h> 105 #include <linux/netdevice.h> 106 #include <net/net_namespace.h> 107 #include <net/sock.h> 108 #include <net/tcp_states.h> 109 #include <net/af_unix.h> 110 #include <linux/proc_fs.h> 111 #include <linux/seq_file.h> 112 #include <net/scm.h> 113 #include <linux/init.h> 114 #include <linux/poll.h> 115 #include <linux/rtnetlink.h> 116 #include <linux/mount.h> 117 #include <net/checksum.h> 118 #include <linux/security.h> 119 #include <linux/freezer.h> 120 121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 122 EXPORT_SYMBOL_GPL(unix_socket_table); 123 DEFINE_SPINLOCK(unix_table_lock); 124 EXPORT_SYMBOL_GPL(unix_table_lock); 125 static atomic_long_t unix_nr_socks; 126 127 128 static struct hlist_head *unix_sockets_unbound(void *addr) 129 { 130 unsigned long hash = (unsigned long)addr; 131 132 hash ^= hash >> 16; 133 hash ^= hash >> 8; 134 hash %= UNIX_HASH_SIZE; 135 return &unix_socket_table[UNIX_HASH_SIZE + hash]; 136 } 137 138 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) 139 140 #ifdef CONFIG_SECURITY_NETWORK 141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 142 { 143 UNIXCB(skb).secid = scm->secid; 144 } 145 146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 147 { 148 scm->secid = UNIXCB(skb).secid; 149 } 150 151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 152 { 153 return (scm->secid == UNIXCB(skb).secid); 154 } 155 #else 156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 157 { } 158 159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 160 { } 161 162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 163 { 164 return true; 165 } 166 #endif /* CONFIG_SECURITY_NETWORK */ 167 168 /* 169 * SMP locking strategy: 170 * hash table is protected with spinlock unix_table_lock 171 * each socket state is protected by separate spin lock. 172 */ 173 174 static inline unsigned int unix_hash_fold(__wsum n) 175 { 176 unsigned int hash = (__force unsigned int)csum_fold(n); 177 178 hash ^= hash>>8; 179 return hash&(UNIX_HASH_SIZE-1); 180 } 181 182 #define unix_peer(sk) (unix_sk(sk)->peer) 183 184 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 185 { 186 return unix_peer(osk) == sk; 187 } 188 189 static inline int unix_may_send(struct sock *sk, struct sock *osk) 190 { 191 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 192 } 193 194 static inline int unix_recvq_full(struct sock const *sk) 195 { 196 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 197 } 198 199 struct sock *unix_peer_get(struct sock *s) 200 { 201 struct sock *peer; 202 203 unix_state_lock(s); 204 peer = unix_peer(s); 205 if (peer) 206 sock_hold(peer); 207 unix_state_unlock(s); 208 return peer; 209 } 210 EXPORT_SYMBOL_GPL(unix_peer_get); 211 212 static inline void unix_release_addr(struct unix_address *addr) 213 { 214 if (atomic_dec_and_test(&addr->refcnt)) 215 kfree(addr); 216 } 217 218 /* 219 * Check unix socket name: 220 * - should be not zero length. 221 * - if started by not zero, should be NULL terminated (FS object) 222 * - if started by zero, it is abstract name. 223 */ 224 225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) 226 { 227 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 228 return -EINVAL; 229 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 230 return -EINVAL; 231 if (sunaddr->sun_path[0]) { 232 /* 233 * This may look like an off by one error but it is a bit more 234 * subtle. 108 is the longest valid AF_UNIX path for a binding. 235 * sun_path[108] doesn't as such exist. However in kernel space 236 * we are guaranteed that it is a valid memory location in our 237 * kernel address buffer. 238 */ 239 ((char *)sunaddr)[len] = 0; 240 len = strlen(sunaddr->sun_path)+1+sizeof(short); 241 return len; 242 } 243 244 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); 245 return len; 246 } 247 248 static void __unix_remove_socket(struct sock *sk) 249 { 250 sk_del_node_init(sk); 251 } 252 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 254 { 255 WARN_ON(!sk_unhashed(sk)); 256 sk_add_node(sk, list); 257 } 258 259 static inline void unix_remove_socket(struct sock *sk) 260 { 261 spin_lock(&unix_table_lock); 262 __unix_remove_socket(sk); 263 spin_unlock(&unix_table_lock); 264 } 265 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 267 { 268 spin_lock(&unix_table_lock); 269 __unix_insert_socket(list, sk); 270 spin_unlock(&unix_table_lock); 271 } 272 273 static struct sock *__unix_find_socket_byname(struct net *net, 274 struct sockaddr_un *sunname, 275 int len, int type, unsigned int hash) 276 { 277 struct sock *s; 278 279 sk_for_each(s, &unix_socket_table[hash ^ type]) { 280 struct unix_sock *u = unix_sk(s); 281 282 if (!net_eq(sock_net(s), net)) 283 continue; 284 285 if (u->addr->len == len && 286 !memcmp(u->addr->name, sunname, len)) 287 goto found; 288 } 289 s = NULL; 290 found: 291 return s; 292 } 293 294 static inline struct sock *unix_find_socket_byname(struct net *net, 295 struct sockaddr_un *sunname, 296 int len, int type, 297 unsigned int hash) 298 { 299 struct sock *s; 300 301 spin_lock(&unix_table_lock); 302 s = __unix_find_socket_byname(net, sunname, len, type, hash); 303 if (s) 304 sock_hold(s); 305 spin_unlock(&unix_table_lock); 306 return s; 307 } 308 309 static struct sock *unix_find_socket_byinode(struct inode *i) 310 { 311 struct sock *s; 312 313 spin_lock(&unix_table_lock); 314 sk_for_each(s, 315 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 316 struct dentry *dentry = unix_sk(s)->path.dentry; 317 318 if (dentry && d_backing_inode(dentry) == i) { 319 sock_hold(s); 320 goto found; 321 } 322 } 323 s = NULL; 324 found: 325 spin_unlock(&unix_table_lock); 326 return s; 327 } 328 329 static inline int unix_writable(struct sock *sk) 330 { 331 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 332 } 333 334 static void unix_write_space(struct sock *sk) 335 { 336 struct socket_wq *wq; 337 338 rcu_read_lock(); 339 if (unix_writable(sk)) { 340 wq = rcu_dereference(sk->sk_wq); 341 if (wq_has_sleeper(wq)) 342 wake_up_interruptible_sync_poll(&wq->wait, 343 POLLOUT | POLLWRNORM | POLLWRBAND); 344 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 345 } 346 rcu_read_unlock(); 347 } 348 349 /* When dgram socket disconnects (or changes its peer), we clear its receive 350 * queue of packets arrived from previous peer. First, it allows to do 351 * flow control based only on wmem_alloc; second, sk connected to peer 352 * may receive messages only from that peer. */ 353 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 354 { 355 if (!skb_queue_empty(&sk->sk_receive_queue)) { 356 skb_queue_purge(&sk->sk_receive_queue); 357 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 358 359 /* If one link of bidirectional dgram pipe is disconnected, 360 * we signal error. Messages are lost. Do not make this, 361 * when peer was not connected to us. 362 */ 363 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 364 other->sk_err = ECONNRESET; 365 other->sk_error_report(other); 366 } 367 } 368 } 369 370 static void unix_sock_destructor(struct sock *sk) 371 { 372 struct unix_sock *u = unix_sk(sk); 373 374 skb_queue_purge(&sk->sk_receive_queue); 375 376 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 377 WARN_ON(!sk_unhashed(sk)); 378 WARN_ON(sk->sk_socket); 379 if (!sock_flag(sk, SOCK_DEAD)) { 380 pr_info("Attempt to release alive unix socket: %p\n", sk); 381 return; 382 } 383 384 if (u->addr) 385 unix_release_addr(u->addr); 386 387 atomic_long_dec(&unix_nr_socks); 388 local_bh_disable(); 389 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 390 local_bh_enable(); 391 #ifdef UNIX_REFCNT_DEBUG 392 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 393 atomic_long_read(&unix_nr_socks)); 394 #endif 395 } 396 397 static void unix_release_sock(struct sock *sk, int embrion) 398 { 399 struct unix_sock *u = unix_sk(sk); 400 struct path path; 401 struct sock *skpair; 402 struct sk_buff *skb; 403 int state; 404 405 unix_remove_socket(sk); 406 407 /* Clear state */ 408 unix_state_lock(sk); 409 sock_orphan(sk); 410 sk->sk_shutdown = SHUTDOWN_MASK; 411 path = u->path; 412 u->path.dentry = NULL; 413 u->path.mnt = NULL; 414 state = sk->sk_state; 415 sk->sk_state = TCP_CLOSE; 416 unix_state_unlock(sk); 417 418 wake_up_interruptible_all(&u->peer_wait); 419 420 skpair = unix_peer(sk); 421 422 if (skpair != NULL) { 423 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 424 unix_state_lock(skpair); 425 /* No more writes */ 426 skpair->sk_shutdown = SHUTDOWN_MASK; 427 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 428 skpair->sk_err = ECONNRESET; 429 unix_state_unlock(skpair); 430 skpair->sk_state_change(skpair); 431 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 432 } 433 sock_put(skpair); /* It may now die */ 434 unix_peer(sk) = NULL; 435 } 436 437 /* Try to flush out this socket. Throw out buffers at least */ 438 439 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 440 if (state == TCP_LISTEN) 441 unix_release_sock(skb->sk, 1); 442 /* passed fds are erased in the kfree_skb hook */ 443 kfree_skb(skb); 444 } 445 446 if (path.dentry) 447 path_put(&path); 448 449 sock_put(sk); 450 451 /* ---- Socket is dead now and most probably destroyed ---- */ 452 453 /* 454 * Fixme: BSD difference: In BSD all sockets connected to us get 455 * ECONNRESET and we die on the spot. In Linux we behave 456 * like files and pipes do and wait for the last 457 * dereference. 458 * 459 * Can't we simply set sock->err? 460 * 461 * What the above comment does talk about? --ANK(980817) 462 */ 463 464 if (unix_tot_inflight) 465 unix_gc(); /* Garbage collect fds */ 466 } 467 468 static void init_peercred(struct sock *sk) 469 { 470 put_pid(sk->sk_peer_pid); 471 if (sk->sk_peer_cred) 472 put_cred(sk->sk_peer_cred); 473 sk->sk_peer_pid = get_pid(task_tgid(current)); 474 sk->sk_peer_cred = get_current_cred(); 475 } 476 477 static void copy_peercred(struct sock *sk, struct sock *peersk) 478 { 479 put_pid(sk->sk_peer_pid); 480 if (sk->sk_peer_cred) 481 put_cred(sk->sk_peer_cred); 482 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 483 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 484 } 485 486 static int unix_listen(struct socket *sock, int backlog) 487 { 488 int err; 489 struct sock *sk = sock->sk; 490 struct unix_sock *u = unix_sk(sk); 491 struct pid *old_pid = NULL; 492 493 err = -EOPNOTSUPP; 494 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 495 goto out; /* Only stream/seqpacket sockets accept */ 496 err = -EINVAL; 497 if (!u->addr) 498 goto out; /* No listens on an unbound socket */ 499 unix_state_lock(sk); 500 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 501 goto out_unlock; 502 if (backlog > sk->sk_max_ack_backlog) 503 wake_up_interruptible_all(&u->peer_wait); 504 sk->sk_max_ack_backlog = backlog; 505 sk->sk_state = TCP_LISTEN; 506 /* set credentials so connect can copy them */ 507 init_peercred(sk); 508 err = 0; 509 510 out_unlock: 511 unix_state_unlock(sk); 512 put_pid(old_pid); 513 out: 514 return err; 515 } 516 517 static int unix_release(struct socket *); 518 static int unix_bind(struct socket *, struct sockaddr *, int); 519 static int unix_stream_connect(struct socket *, struct sockaddr *, 520 int addr_len, int flags); 521 static int unix_socketpair(struct socket *, struct socket *); 522 static int unix_accept(struct socket *, struct socket *, int); 523 static int unix_getname(struct socket *, struct sockaddr *, int *, int); 524 static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 525 static unsigned int unix_dgram_poll(struct file *, struct socket *, 526 poll_table *); 527 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 528 static int unix_shutdown(struct socket *, int); 529 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 530 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 531 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 532 size_t size, int flags); 533 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 534 struct pipe_inode_info *, size_t size, 535 unsigned int flags); 536 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 537 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 538 static int unix_dgram_connect(struct socket *, struct sockaddr *, 539 int, int); 540 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 541 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 542 int); 543 544 static int unix_set_peek_off(struct sock *sk, int val) 545 { 546 struct unix_sock *u = unix_sk(sk); 547 548 if (mutex_lock_interruptible(&u->readlock)) 549 return -EINTR; 550 551 sk->sk_peek_off = val; 552 mutex_unlock(&u->readlock); 553 554 return 0; 555 } 556 557 558 static const struct proto_ops unix_stream_ops = { 559 .family = PF_UNIX, 560 .owner = THIS_MODULE, 561 .release = unix_release, 562 .bind = unix_bind, 563 .connect = unix_stream_connect, 564 .socketpair = unix_socketpair, 565 .accept = unix_accept, 566 .getname = unix_getname, 567 .poll = unix_poll, 568 .ioctl = unix_ioctl, 569 .listen = unix_listen, 570 .shutdown = unix_shutdown, 571 .setsockopt = sock_no_setsockopt, 572 .getsockopt = sock_no_getsockopt, 573 .sendmsg = unix_stream_sendmsg, 574 .recvmsg = unix_stream_recvmsg, 575 .mmap = sock_no_mmap, 576 .sendpage = unix_stream_sendpage, 577 .splice_read = unix_stream_splice_read, 578 .set_peek_off = unix_set_peek_off, 579 }; 580 581 static const struct proto_ops unix_dgram_ops = { 582 .family = PF_UNIX, 583 .owner = THIS_MODULE, 584 .release = unix_release, 585 .bind = unix_bind, 586 .connect = unix_dgram_connect, 587 .socketpair = unix_socketpair, 588 .accept = sock_no_accept, 589 .getname = unix_getname, 590 .poll = unix_dgram_poll, 591 .ioctl = unix_ioctl, 592 .listen = sock_no_listen, 593 .shutdown = unix_shutdown, 594 .setsockopt = sock_no_setsockopt, 595 .getsockopt = sock_no_getsockopt, 596 .sendmsg = unix_dgram_sendmsg, 597 .recvmsg = unix_dgram_recvmsg, 598 .mmap = sock_no_mmap, 599 .sendpage = sock_no_sendpage, 600 .set_peek_off = unix_set_peek_off, 601 }; 602 603 static const struct proto_ops unix_seqpacket_ops = { 604 .family = PF_UNIX, 605 .owner = THIS_MODULE, 606 .release = unix_release, 607 .bind = unix_bind, 608 .connect = unix_stream_connect, 609 .socketpair = unix_socketpair, 610 .accept = unix_accept, 611 .getname = unix_getname, 612 .poll = unix_dgram_poll, 613 .ioctl = unix_ioctl, 614 .listen = unix_listen, 615 .shutdown = unix_shutdown, 616 .setsockopt = sock_no_setsockopt, 617 .getsockopt = sock_no_getsockopt, 618 .sendmsg = unix_seqpacket_sendmsg, 619 .recvmsg = unix_seqpacket_recvmsg, 620 .mmap = sock_no_mmap, 621 .sendpage = sock_no_sendpage, 622 .set_peek_off = unix_set_peek_off, 623 }; 624 625 static struct proto unix_proto = { 626 .name = "UNIX", 627 .owner = THIS_MODULE, 628 .obj_size = sizeof(struct unix_sock), 629 }; 630 631 /* 632 * AF_UNIX sockets do not interact with hardware, hence they 633 * dont trigger interrupts - so it's safe for them to have 634 * bh-unsafe locking for their sk_receive_queue.lock. Split off 635 * this special lock-class by reinitializing the spinlock key: 636 */ 637 static struct lock_class_key af_unix_sk_receive_queue_lock_key; 638 639 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) 640 { 641 struct sock *sk = NULL; 642 struct unix_sock *u; 643 644 atomic_long_inc(&unix_nr_socks); 645 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) 646 goto out; 647 648 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); 649 if (!sk) 650 goto out; 651 652 sock_init_data(sock, sk); 653 lockdep_set_class(&sk->sk_receive_queue.lock, 654 &af_unix_sk_receive_queue_lock_key); 655 656 sk->sk_write_space = unix_write_space; 657 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 658 sk->sk_destruct = unix_sock_destructor; 659 u = unix_sk(sk); 660 u->path.dentry = NULL; 661 u->path.mnt = NULL; 662 spin_lock_init(&u->lock); 663 atomic_long_set(&u->inflight, 0); 664 INIT_LIST_HEAD(&u->link); 665 mutex_init(&u->readlock); /* single task reading lock */ 666 init_waitqueue_head(&u->peer_wait); 667 unix_insert_socket(unix_sockets_unbound(sk), sk); 668 out: 669 if (sk == NULL) 670 atomic_long_dec(&unix_nr_socks); 671 else { 672 local_bh_disable(); 673 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 674 local_bh_enable(); 675 } 676 return sk; 677 } 678 679 static int unix_create(struct net *net, struct socket *sock, int protocol, 680 int kern) 681 { 682 if (protocol && protocol != PF_UNIX) 683 return -EPROTONOSUPPORT; 684 685 sock->state = SS_UNCONNECTED; 686 687 switch (sock->type) { 688 case SOCK_STREAM: 689 sock->ops = &unix_stream_ops; 690 break; 691 /* 692 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 693 * nothing uses it. 694 */ 695 case SOCK_RAW: 696 sock->type = SOCK_DGRAM; 697 case SOCK_DGRAM: 698 sock->ops = &unix_dgram_ops; 699 break; 700 case SOCK_SEQPACKET: 701 sock->ops = &unix_seqpacket_ops; 702 break; 703 default: 704 return -ESOCKTNOSUPPORT; 705 } 706 707 return unix_create1(net, sock, kern) ? 0 : -ENOMEM; 708 } 709 710 static int unix_release(struct socket *sock) 711 { 712 struct sock *sk = sock->sk; 713 714 if (!sk) 715 return 0; 716 717 unix_release_sock(sk, 0); 718 sock->sk = NULL; 719 720 return 0; 721 } 722 723 static int unix_autobind(struct socket *sock) 724 { 725 struct sock *sk = sock->sk; 726 struct net *net = sock_net(sk); 727 struct unix_sock *u = unix_sk(sk); 728 static u32 ordernum = 1; 729 struct unix_address *addr; 730 int err; 731 unsigned int retries = 0; 732 733 err = mutex_lock_interruptible(&u->readlock); 734 if (err) 735 return err; 736 737 err = 0; 738 if (u->addr) 739 goto out; 740 741 err = -ENOMEM; 742 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 743 if (!addr) 744 goto out; 745 746 addr->name->sun_family = AF_UNIX; 747 atomic_set(&addr->refcnt, 1); 748 749 retry: 750 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 751 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); 752 753 spin_lock(&unix_table_lock); 754 ordernum = (ordernum+1)&0xFFFFF; 755 756 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, 757 addr->hash)) { 758 spin_unlock(&unix_table_lock); 759 /* 760 * __unix_find_socket_byname() may take long time if many names 761 * are already in use. 762 */ 763 cond_resched(); 764 /* Give up if all names seems to be in use. */ 765 if (retries++ == 0xFFFFF) { 766 err = -ENOSPC; 767 kfree(addr); 768 goto out; 769 } 770 goto retry; 771 } 772 addr->hash ^= sk->sk_type; 773 774 __unix_remove_socket(sk); 775 u->addr = addr; 776 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 777 spin_unlock(&unix_table_lock); 778 err = 0; 779 780 out: mutex_unlock(&u->readlock); 781 return err; 782 } 783 784 static struct sock *unix_find_other(struct net *net, 785 struct sockaddr_un *sunname, int len, 786 int type, unsigned int hash, int *error) 787 { 788 struct sock *u; 789 struct path path; 790 int err = 0; 791 792 if (sunname->sun_path[0]) { 793 struct inode *inode; 794 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); 795 if (err) 796 goto fail; 797 inode = d_backing_inode(path.dentry); 798 err = inode_permission(inode, MAY_WRITE); 799 if (err) 800 goto put_fail; 801 802 err = -ECONNREFUSED; 803 if (!S_ISSOCK(inode->i_mode)) 804 goto put_fail; 805 u = unix_find_socket_byinode(inode); 806 if (!u) 807 goto put_fail; 808 809 if (u->sk_type == type) 810 touch_atime(&path); 811 812 path_put(&path); 813 814 err = -EPROTOTYPE; 815 if (u->sk_type != type) { 816 sock_put(u); 817 goto fail; 818 } 819 } else { 820 err = -ECONNREFUSED; 821 u = unix_find_socket_byname(net, sunname, len, type, hash); 822 if (u) { 823 struct dentry *dentry; 824 dentry = unix_sk(u)->path.dentry; 825 if (dentry) 826 touch_atime(&unix_sk(u)->path); 827 } else 828 goto fail; 829 } 830 return u; 831 832 put_fail: 833 path_put(&path); 834 fail: 835 *error = err; 836 return NULL; 837 } 838 839 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) 840 { 841 struct dentry *dentry; 842 struct path path; 843 int err = 0; 844 /* 845 * Get the parent directory, calculate the hash for last 846 * component. 847 */ 848 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); 849 err = PTR_ERR(dentry); 850 if (IS_ERR(dentry)) 851 return err; 852 853 /* 854 * All right, let's create it. 855 */ 856 err = security_path_mknod(&path, dentry, mode, 0); 857 if (!err) { 858 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); 859 if (!err) { 860 res->mnt = mntget(path.mnt); 861 res->dentry = dget(dentry); 862 } 863 } 864 done_path_create(&path, dentry); 865 return err; 866 } 867 868 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 869 { 870 struct sock *sk = sock->sk; 871 struct net *net = sock_net(sk); 872 struct unix_sock *u = unix_sk(sk); 873 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 874 char *sun_path = sunaddr->sun_path; 875 int err; 876 unsigned int hash; 877 struct unix_address *addr; 878 struct hlist_head *list; 879 880 err = -EINVAL; 881 if (sunaddr->sun_family != AF_UNIX) 882 goto out; 883 884 if (addr_len == sizeof(short)) { 885 err = unix_autobind(sock); 886 goto out; 887 } 888 889 err = unix_mkname(sunaddr, addr_len, &hash); 890 if (err < 0) 891 goto out; 892 addr_len = err; 893 894 err = mutex_lock_interruptible(&u->readlock); 895 if (err) 896 goto out; 897 898 err = -EINVAL; 899 if (u->addr) 900 goto out_up; 901 902 err = -ENOMEM; 903 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 904 if (!addr) 905 goto out_up; 906 907 memcpy(addr->name, sunaddr, addr_len); 908 addr->len = addr_len; 909 addr->hash = hash ^ sk->sk_type; 910 atomic_set(&addr->refcnt, 1); 911 912 if (sun_path[0]) { 913 struct path path; 914 umode_t mode = S_IFSOCK | 915 (SOCK_INODE(sock)->i_mode & ~current_umask()); 916 err = unix_mknod(sun_path, mode, &path); 917 if (err) { 918 if (err == -EEXIST) 919 err = -EADDRINUSE; 920 unix_release_addr(addr); 921 goto out_up; 922 } 923 addr->hash = UNIX_HASH_SIZE; 924 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); 925 spin_lock(&unix_table_lock); 926 u->path = path; 927 list = &unix_socket_table[hash]; 928 } else { 929 spin_lock(&unix_table_lock); 930 err = -EADDRINUSE; 931 if (__unix_find_socket_byname(net, sunaddr, addr_len, 932 sk->sk_type, hash)) { 933 unix_release_addr(addr); 934 goto out_unlock; 935 } 936 937 list = &unix_socket_table[addr->hash]; 938 } 939 940 err = 0; 941 __unix_remove_socket(sk); 942 u->addr = addr; 943 __unix_insert_socket(list, sk); 944 945 out_unlock: 946 spin_unlock(&unix_table_lock); 947 out_up: 948 mutex_unlock(&u->readlock); 949 out: 950 return err; 951 } 952 953 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 954 { 955 if (unlikely(sk1 == sk2) || !sk2) { 956 unix_state_lock(sk1); 957 return; 958 } 959 if (sk1 < sk2) { 960 unix_state_lock(sk1); 961 unix_state_lock_nested(sk2); 962 } else { 963 unix_state_lock(sk2); 964 unix_state_lock_nested(sk1); 965 } 966 } 967 968 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 969 { 970 if (unlikely(sk1 == sk2) || !sk2) { 971 unix_state_unlock(sk1); 972 return; 973 } 974 unix_state_unlock(sk1); 975 unix_state_unlock(sk2); 976 } 977 978 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 979 int alen, int flags) 980 { 981 struct sock *sk = sock->sk; 982 struct net *net = sock_net(sk); 983 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 984 struct sock *other; 985 unsigned int hash; 986 int err; 987 988 if (addr->sa_family != AF_UNSPEC) { 989 err = unix_mkname(sunaddr, alen, &hash); 990 if (err < 0) 991 goto out; 992 alen = err; 993 994 if (test_bit(SOCK_PASSCRED, &sock->flags) && 995 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 996 goto out; 997 998 restart: 999 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); 1000 if (!other) 1001 goto out; 1002 1003 unix_state_double_lock(sk, other); 1004 1005 /* Apparently VFS overslept socket death. Retry. */ 1006 if (sock_flag(other, SOCK_DEAD)) { 1007 unix_state_double_unlock(sk, other); 1008 sock_put(other); 1009 goto restart; 1010 } 1011 1012 err = -EPERM; 1013 if (!unix_may_send(sk, other)) 1014 goto out_unlock; 1015 1016 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1017 if (err) 1018 goto out_unlock; 1019 1020 } else { 1021 /* 1022 * 1003.1g breaking connected state with AF_UNSPEC 1023 */ 1024 other = NULL; 1025 unix_state_double_lock(sk, other); 1026 } 1027 1028 /* 1029 * If it was connected, reconnect. 1030 */ 1031 if (unix_peer(sk)) { 1032 struct sock *old_peer = unix_peer(sk); 1033 unix_peer(sk) = other; 1034 unix_state_double_unlock(sk, other); 1035 1036 if (other != old_peer) 1037 unix_dgram_disconnected(sk, old_peer); 1038 sock_put(old_peer); 1039 } else { 1040 unix_peer(sk) = other; 1041 unix_state_double_unlock(sk, other); 1042 } 1043 return 0; 1044 1045 out_unlock: 1046 unix_state_double_unlock(sk, other); 1047 sock_put(other); 1048 out: 1049 return err; 1050 } 1051 1052 static long unix_wait_for_peer(struct sock *other, long timeo) 1053 { 1054 struct unix_sock *u = unix_sk(other); 1055 int sched; 1056 DEFINE_WAIT(wait); 1057 1058 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1059 1060 sched = !sock_flag(other, SOCK_DEAD) && 1061 !(other->sk_shutdown & RCV_SHUTDOWN) && 1062 unix_recvq_full(other); 1063 1064 unix_state_unlock(other); 1065 1066 if (sched) 1067 timeo = schedule_timeout(timeo); 1068 1069 finish_wait(&u->peer_wait, &wait); 1070 return timeo; 1071 } 1072 1073 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1074 int addr_len, int flags) 1075 { 1076 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1077 struct sock *sk = sock->sk; 1078 struct net *net = sock_net(sk); 1079 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1080 struct sock *newsk = NULL; 1081 struct sock *other = NULL; 1082 struct sk_buff *skb = NULL; 1083 unsigned int hash; 1084 int st; 1085 int err; 1086 long timeo; 1087 1088 err = unix_mkname(sunaddr, addr_len, &hash); 1089 if (err < 0) 1090 goto out; 1091 addr_len = err; 1092 1093 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && 1094 (err = unix_autobind(sock)) != 0) 1095 goto out; 1096 1097 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1098 1099 /* First of all allocate resources. 1100 If we will make it after state is locked, 1101 we will have to recheck all again in any case. 1102 */ 1103 1104 err = -ENOMEM; 1105 1106 /* create new sock for complete connection */ 1107 newsk = unix_create1(sock_net(sk), NULL, 0); 1108 if (newsk == NULL) 1109 goto out; 1110 1111 /* Allocate skb for sending to listening sock */ 1112 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1113 if (skb == NULL) 1114 goto out; 1115 1116 restart: 1117 /* Find listening sock. */ 1118 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); 1119 if (!other) 1120 goto out; 1121 1122 /* Latch state of peer */ 1123 unix_state_lock(other); 1124 1125 /* Apparently VFS overslept socket death. Retry. */ 1126 if (sock_flag(other, SOCK_DEAD)) { 1127 unix_state_unlock(other); 1128 sock_put(other); 1129 goto restart; 1130 } 1131 1132 err = -ECONNREFUSED; 1133 if (other->sk_state != TCP_LISTEN) 1134 goto out_unlock; 1135 if (other->sk_shutdown & RCV_SHUTDOWN) 1136 goto out_unlock; 1137 1138 if (unix_recvq_full(other)) { 1139 err = -EAGAIN; 1140 if (!timeo) 1141 goto out_unlock; 1142 1143 timeo = unix_wait_for_peer(other, timeo); 1144 1145 err = sock_intr_errno(timeo); 1146 if (signal_pending(current)) 1147 goto out; 1148 sock_put(other); 1149 goto restart; 1150 } 1151 1152 /* Latch our state. 1153 1154 It is tricky place. We need to grab our state lock and cannot 1155 drop lock on peer. It is dangerous because deadlock is 1156 possible. Connect to self case and simultaneous 1157 attempt to connect are eliminated by checking socket 1158 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1159 check this before attempt to grab lock. 1160 1161 Well, and we have to recheck the state after socket locked. 1162 */ 1163 st = sk->sk_state; 1164 1165 switch (st) { 1166 case TCP_CLOSE: 1167 /* This is ok... continue with connect */ 1168 break; 1169 case TCP_ESTABLISHED: 1170 /* Socket is already connected */ 1171 err = -EISCONN; 1172 goto out_unlock; 1173 default: 1174 err = -EINVAL; 1175 goto out_unlock; 1176 } 1177 1178 unix_state_lock_nested(sk); 1179 1180 if (sk->sk_state != st) { 1181 unix_state_unlock(sk); 1182 unix_state_unlock(other); 1183 sock_put(other); 1184 goto restart; 1185 } 1186 1187 err = security_unix_stream_connect(sk, other, newsk); 1188 if (err) { 1189 unix_state_unlock(sk); 1190 goto out_unlock; 1191 } 1192 1193 /* The way is open! Fastly set all the necessary fields... */ 1194 1195 sock_hold(sk); 1196 unix_peer(newsk) = sk; 1197 newsk->sk_state = TCP_ESTABLISHED; 1198 newsk->sk_type = sk->sk_type; 1199 init_peercred(newsk); 1200 newu = unix_sk(newsk); 1201 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1202 otheru = unix_sk(other); 1203 1204 /* copy address information from listening to new sock*/ 1205 if (otheru->addr) { 1206 atomic_inc(&otheru->addr->refcnt); 1207 newu->addr = otheru->addr; 1208 } 1209 if (otheru->path.dentry) { 1210 path_get(&otheru->path); 1211 newu->path = otheru->path; 1212 } 1213 1214 /* Set credentials */ 1215 copy_peercred(sk, other); 1216 1217 sock->state = SS_CONNECTED; 1218 sk->sk_state = TCP_ESTABLISHED; 1219 sock_hold(newsk); 1220 1221 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1222 unix_peer(sk) = newsk; 1223 1224 unix_state_unlock(sk); 1225 1226 /* take ten and and send info to listening sock */ 1227 spin_lock(&other->sk_receive_queue.lock); 1228 __skb_queue_tail(&other->sk_receive_queue, skb); 1229 spin_unlock(&other->sk_receive_queue.lock); 1230 unix_state_unlock(other); 1231 other->sk_data_ready(other); 1232 sock_put(other); 1233 return 0; 1234 1235 out_unlock: 1236 if (other) 1237 unix_state_unlock(other); 1238 1239 out: 1240 kfree_skb(skb); 1241 if (newsk) 1242 unix_release_sock(newsk, 0); 1243 if (other) 1244 sock_put(other); 1245 return err; 1246 } 1247 1248 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1249 { 1250 struct sock *ska = socka->sk, *skb = sockb->sk; 1251 1252 /* Join our sockets back to back */ 1253 sock_hold(ska); 1254 sock_hold(skb); 1255 unix_peer(ska) = skb; 1256 unix_peer(skb) = ska; 1257 init_peercred(ska); 1258 init_peercred(skb); 1259 1260 if (ska->sk_type != SOCK_DGRAM) { 1261 ska->sk_state = TCP_ESTABLISHED; 1262 skb->sk_state = TCP_ESTABLISHED; 1263 socka->state = SS_CONNECTED; 1264 sockb->state = SS_CONNECTED; 1265 } 1266 return 0; 1267 } 1268 1269 static void unix_sock_inherit_flags(const struct socket *old, 1270 struct socket *new) 1271 { 1272 if (test_bit(SOCK_PASSCRED, &old->flags)) 1273 set_bit(SOCK_PASSCRED, &new->flags); 1274 if (test_bit(SOCK_PASSSEC, &old->flags)) 1275 set_bit(SOCK_PASSSEC, &new->flags); 1276 } 1277 1278 static int unix_accept(struct socket *sock, struct socket *newsock, int flags) 1279 { 1280 struct sock *sk = sock->sk; 1281 struct sock *tsk; 1282 struct sk_buff *skb; 1283 int err; 1284 1285 err = -EOPNOTSUPP; 1286 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1287 goto out; 1288 1289 err = -EINVAL; 1290 if (sk->sk_state != TCP_LISTEN) 1291 goto out; 1292 1293 /* If socket state is TCP_LISTEN it cannot change (for now...), 1294 * so that no locks are necessary. 1295 */ 1296 1297 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1298 if (!skb) { 1299 /* This means receive shutdown. */ 1300 if (err == 0) 1301 err = -EINVAL; 1302 goto out; 1303 } 1304 1305 tsk = skb->sk; 1306 skb_free_datagram(sk, skb); 1307 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1308 1309 /* attach accepted sock to socket */ 1310 unix_state_lock(tsk); 1311 newsock->state = SS_CONNECTED; 1312 unix_sock_inherit_flags(sock, newsock); 1313 sock_graft(tsk, newsock); 1314 unix_state_unlock(tsk); 1315 return 0; 1316 1317 out: 1318 return err; 1319 } 1320 1321 1322 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) 1323 { 1324 struct sock *sk = sock->sk; 1325 struct unix_sock *u; 1326 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1327 int err = 0; 1328 1329 if (peer) { 1330 sk = unix_peer_get(sk); 1331 1332 err = -ENOTCONN; 1333 if (!sk) 1334 goto out; 1335 err = 0; 1336 } else { 1337 sock_hold(sk); 1338 } 1339 1340 u = unix_sk(sk); 1341 unix_state_lock(sk); 1342 if (!u->addr) { 1343 sunaddr->sun_family = AF_UNIX; 1344 sunaddr->sun_path[0] = 0; 1345 *uaddr_len = sizeof(short); 1346 } else { 1347 struct unix_address *addr = u->addr; 1348 1349 *uaddr_len = addr->len; 1350 memcpy(sunaddr, addr->name, *uaddr_len); 1351 } 1352 unix_state_unlock(sk); 1353 sock_put(sk); 1354 out: 1355 return err; 1356 } 1357 1358 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1359 { 1360 int i; 1361 1362 scm->fp = UNIXCB(skb).fp; 1363 UNIXCB(skb).fp = NULL; 1364 1365 for (i = scm->fp->count-1; i >= 0; i--) 1366 unix_notinflight(scm->fp->fp[i]); 1367 } 1368 1369 static void unix_destruct_scm(struct sk_buff *skb) 1370 { 1371 struct scm_cookie scm; 1372 memset(&scm, 0, sizeof(scm)); 1373 scm.pid = UNIXCB(skb).pid; 1374 if (UNIXCB(skb).fp) 1375 unix_detach_fds(&scm, skb); 1376 1377 /* Alas, it calls VFS */ 1378 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1379 scm_destroy(&scm); 1380 sock_wfree(skb); 1381 } 1382 1383 #define MAX_RECURSION_LEVEL 4 1384 1385 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1386 { 1387 int i; 1388 unsigned char max_level = 0; 1389 int unix_sock_count = 0; 1390 1391 for (i = scm->fp->count - 1; i >= 0; i--) { 1392 struct sock *sk = unix_get_socket(scm->fp->fp[i]); 1393 1394 if (sk) { 1395 unix_sock_count++; 1396 max_level = max(max_level, 1397 unix_sk(sk)->recursion_level); 1398 } 1399 } 1400 if (unlikely(max_level > MAX_RECURSION_LEVEL)) 1401 return -ETOOMANYREFS; 1402 1403 /* 1404 * Need to duplicate file references for the sake of garbage 1405 * collection. Otherwise a socket in the fps might become a 1406 * candidate for GC while the skb is not yet queued. 1407 */ 1408 UNIXCB(skb).fp = scm_fp_dup(scm->fp); 1409 if (!UNIXCB(skb).fp) 1410 return -ENOMEM; 1411 1412 if (unix_sock_count) { 1413 for (i = scm->fp->count - 1; i >= 0; i--) 1414 unix_inflight(scm->fp->fp[i]); 1415 } 1416 return max_level; 1417 } 1418 1419 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1420 { 1421 int err = 0; 1422 1423 UNIXCB(skb).pid = get_pid(scm->pid); 1424 UNIXCB(skb).uid = scm->creds.uid; 1425 UNIXCB(skb).gid = scm->creds.gid; 1426 UNIXCB(skb).fp = NULL; 1427 unix_get_secdata(scm, skb); 1428 if (scm->fp && send_fds) 1429 err = unix_attach_fds(scm, skb); 1430 1431 skb->destructor = unix_destruct_scm; 1432 return err; 1433 } 1434 1435 /* 1436 * Some apps rely on write() giving SCM_CREDENTIALS 1437 * We include credentials if source or destination socket 1438 * asserted SOCK_PASSCRED. 1439 */ 1440 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1441 const struct sock *other) 1442 { 1443 if (UNIXCB(skb).pid) 1444 return; 1445 if (test_bit(SOCK_PASSCRED, &sock->flags) || 1446 !other->sk_socket || 1447 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { 1448 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1449 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1450 } 1451 } 1452 1453 /* 1454 * Send AF_UNIX data. 1455 */ 1456 1457 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1458 size_t len) 1459 { 1460 struct sock *sk = sock->sk; 1461 struct net *net = sock_net(sk); 1462 struct unix_sock *u = unix_sk(sk); 1463 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1464 struct sock *other = NULL; 1465 int namelen = 0; /* fake GCC */ 1466 int err; 1467 unsigned int hash; 1468 struct sk_buff *skb; 1469 long timeo; 1470 struct scm_cookie scm; 1471 int max_level; 1472 int data_len = 0; 1473 1474 wait_for_unix_gc(); 1475 err = scm_send(sock, msg, &scm, false); 1476 if (err < 0) 1477 return err; 1478 1479 err = -EOPNOTSUPP; 1480 if (msg->msg_flags&MSG_OOB) 1481 goto out; 1482 1483 if (msg->msg_namelen) { 1484 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1485 if (err < 0) 1486 goto out; 1487 namelen = err; 1488 } else { 1489 sunaddr = NULL; 1490 err = -ENOTCONN; 1491 other = unix_peer_get(sk); 1492 if (!other) 1493 goto out; 1494 } 1495 1496 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr 1497 && (err = unix_autobind(sock)) != 0) 1498 goto out; 1499 1500 err = -EMSGSIZE; 1501 if (len > sk->sk_sndbuf - 32) 1502 goto out; 1503 1504 if (len > SKB_MAX_ALLOC) { 1505 data_len = min_t(size_t, 1506 len - SKB_MAX_ALLOC, 1507 MAX_SKB_FRAGS * PAGE_SIZE); 1508 data_len = PAGE_ALIGN(data_len); 1509 1510 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1511 } 1512 1513 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1514 msg->msg_flags & MSG_DONTWAIT, &err, 1515 PAGE_ALLOC_COSTLY_ORDER); 1516 if (skb == NULL) 1517 goto out; 1518 1519 err = unix_scm_to_skb(&scm, skb, true); 1520 if (err < 0) 1521 goto out_free; 1522 max_level = err + 1; 1523 1524 skb_put(skb, len - data_len); 1525 skb->data_len = data_len; 1526 skb->len = len; 1527 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1528 if (err) 1529 goto out_free; 1530 1531 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1532 1533 restart: 1534 if (!other) { 1535 err = -ECONNRESET; 1536 if (sunaddr == NULL) 1537 goto out_free; 1538 1539 other = unix_find_other(net, sunaddr, namelen, sk->sk_type, 1540 hash, &err); 1541 if (other == NULL) 1542 goto out_free; 1543 } 1544 1545 if (sk_filter(other, skb) < 0) { 1546 /* Toss the packet but do not return any error to the sender */ 1547 err = len; 1548 goto out_free; 1549 } 1550 1551 unix_state_lock(other); 1552 err = -EPERM; 1553 if (!unix_may_send(sk, other)) 1554 goto out_unlock; 1555 1556 if (sock_flag(other, SOCK_DEAD)) { 1557 /* 1558 * Check with 1003.1g - what should 1559 * datagram error 1560 */ 1561 unix_state_unlock(other); 1562 sock_put(other); 1563 1564 err = 0; 1565 unix_state_lock(sk); 1566 if (unix_peer(sk) == other) { 1567 unix_peer(sk) = NULL; 1568 unix_state_unlock(sk); 1569 1570 unix_dgram_disconnected(sk, other); 1571 sock_put(other); 1572 err = -ECONNREFUSED; 1573 } else { 1574 unix_state_unlock(sk); 1575 } 1576 1577 other = NULL; 1578 if (err) 1579 goto out_free; 1580 goto restart; 1581 } 1582 1583 err = -EPIPE; 1584 if (other->sk_shutdown & RCV_SHUTDOWN) 1585 goto out_unlock; 1586 1587 if (sk->sk_type != SOCK_SEQPACKET) { 1588 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1589 if (err) 1590 goto out_unlock; 1591 } 1592 1593 if (unix_peer(other) != sk && unix_recvq_full(other)) { 1594 if (!timeo) { 1595 err = -EAGAIN; 1596 goto out_unlock; 1597 } 1598 1599 timeo = unix_wait_for_peer(other, timeo); 1600 1601 err = sock_intr_errno(timeo); 1602 if (signal_pending(current)) 1603 goto out_free; 1604 1605 goto restart; 1606 } 1607 1608 if (sock_flag(other, SOCK_RCVTSTAMP)) 1609 __net_timestamp(skb); 1610 maybe_add_creds(skb, sock, other); 1611 skb_queue_tail(&other->sk_receive_queue, skb); 1612 if (max_level > unix_sk(other)->recursion_level) 1613 unix_sk(other)->recursion_level = max_level; 1614 unix_state_unlock(other); 1615 other->sk_data_ready(other); 1616 sock_put(other); 1617 scm_destroy(&scm); 1618 return len; 1619 1620 out_unlock: 1621 unix_state_unlock(other); 1622 out_free: 1623 kfree_skb(skb); 1624 out: 1625 if (other) 1626 sock_put(other); 1627 scm_destroy(&scm); 1628 return err; 1629 } 1630 1631 /* We use paged skbs for stream sockets, and limit occupancy to 32768 1632 * bytes, and a minimun of a full page. 1633 */ 1634 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 1635 1636 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 1637 size_t len) 1638 { 1639 struct sock *sk = sock->sk; 1640 struct sock *other = NULL; 1641 int err, size; 1642 struct sk_buff *skb; 1643 int sent = 0; 1644 struct scm_cookie scm; 1645 bool fds_sent = false; 1646 int max_level; 1647 int data_len; 1648 1649 wait_for_unix_gc(); 1650 err = scm_send(sock, msg, &scm, false); 1651 if (err < 0) 1652 return err; 1653 1654 err = -EOPNOTSUPP; 1655 if (msg->msg_flags&MSG_OOB) 1656 goto out_err; 1657 1658 if (msg->msg_namelen) { 1659 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1660 goto out_err; 1661 } else { 1662 err = -ENOTCONN; 1663 other = unix_peer(sk); 1664 if (!other) 1665 goto out_err; 1666 } 1667 1668 if (sk->sk_shutdown & SEND_SHUTDOWN) 1669 goto pipe_err; 1670 1671 while (sent < len) { 1672 size = len - sent; 1673 1674 /* Keep two messages in the pipe so it schedules better */ 1675 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 1676 1677 /* allow fallback to order-0 allocations */ 1678 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 1679 1680 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 1681 1682 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 1683 1684 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 1685 msg->msg_flags & MSG_DONTWAIT, &err, 1686 get_order(UNIX_SKB_FRAGS_SZ)); 1687 if (!skb) 1688 goto out_err; 1689 1690 /* Only send the fds in the first buffer */ 1691 err = unix_scm_to_skb(&scm, skb, !fds_sent); 1692 if (err < 0) { 1693 kfree_skb(skb); 1694 goto out_err; 1695 } 1696 max_level = err + 1; 1697 fds_sent = true; 1698 1699 skb_put(skb, size - data_len); 1700 skb->data_len = data_len; 1701 skb->len = size; 1702 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 1703 if (err) { 1704 kfree_skb(skb); 1705 goto out_err; 1706 } 1707 1708 unix_state_lock(other); 1709 1710 if (sock_flag(other, SOCK_DEAD) || 1711 (other->sk_shutdown & RCV_SHUTDOWN)) 1712 goto pipe_err_free; 1713 1714 maybe_add_creds(skb, sock, other); 1715 skb_queue_tail(&other->sk_receive_queue, skb); 1716 if (max_level > unix_sk(other)->recursion_level) 1717 unix_sk(other)->recursion_level = max_level; 1718 unix_state_unlock(other); 1719 other->sk_data_ready(other); 1720 sent += size; 1721 } 1722 1723 scm_destroy(&scm); 1724 1725 return sent; 1726 1727 pipe_err_free: 1728 unix_state_unlock(other); 1729 kfree_skb(skb); 1730 pipe_err: 1731 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1732 send_sig(SIGPIPE, current, 0); 1733 err = -EPIPE; 1734 out_err: 1735 scm_destroy(&scm); 1736 return sent ? : err; 1737 } 1738 1739 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 1740 int offset, size_t size, int flags) 1741 { 1742 int err = 0; 1743 bool send_sigpipe = true; 1744 struct sock *other, *sk = socket->sk; 1745 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 1746 1747 if (flags & MSG_OOB) 1748 return -EOPNOTSUPP; 1749 1750 other = unix_peer(sk); 1751 if (!other || sk->sk_state != TCP_ESTABLISHED) 1752 return -ENOTCONN; 1753 1754 if (false) { 1755 alloc_skb: 1756 unix_state_unlock(other); 1757 mutex_unlock(&unix_sk(other)->readlock); 1758 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 1759 &err, 0); 1760 if (!newskb) 1761 return err; 1762 } 1763 1764 /* we must acquire readlock as we modify already present 1765 * skbs in the sk_receive_queue and mess with skb->len 1766 */ 1767 err = mutex_lock_interruptible(&unix_sk(other)->readlock); 1768 if (err) { 1769 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 1770 send_sigpipe = false; 1771 goto err; 1772 } 1773 1774 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1775 err = -EPIPE; 1776 goto err_unlock; 1777 } 1778 1779 unix_state_lock(other); 1780 1781 if (sock_flag(other, SOCK_DEAD) || 1782 other->sk_shutdown & RCV_SHUTDOWN) { 1783 err = -EPIPE; 1784 goto err_state_unlock; 1785 } 1786 1787 skb = skb_peek_tail(&other->sk_receive_queue); 1788 if (tail && tail == skb) { 1789 skb = newskb; 1790 } else if (!skb) { 1791 if (newskb) 1792 skb = newskb; 1793 else 1794 goto alloc_skb; 1795 } else if (newskb) { 1796 /* this is fast path, we don't necessarily need to 1797 * call to kfree_skb even though with newskb == NULL 1798 * this - does no harm 1799 */ 1800 consume_skb(newskb); 1801 } 1802 1803 if (skb_append_pagefrags(skb, page, offset, size)) { 1804 tail = skb; 1805 goto alloc_skb; 1806 } 1807 1808 skb->len += size; 1809 skb->data_len += size; 1810 skb->truesize += size; 1811 atomic_add(size, &sk->sk_wmem_alloc); 1812 1813 if (newskb) 1814 __skb_queue_tail(&other->sk_receive_queue, newskb); 1815 1816 unix_state_unlock(other); 1817 mutex_unlock(&unix_sk(other)->readlock); 1818 1819 other->sk_data_ready(other); 1820 1821 return size; 1822 1823 err_state_unlock: 1824 unix_state_unlock(other); 1825 err_unlock: 1826 mutex_unlock(&unix_sk(other)->readlock); 1827 err: 1828 kfree_skb(newskb); 1829 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 1830 send_sig(SIGPIPE, current, 0); 1831 return err; 1832 } 1833 1834 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 1835 size_t len) 1836 { 1837 int err; 1838 struct sock *sk = sock->sk; 1839 1840 err = sock_error(sk); 1841 if (err) 1842 return err; 1843 1844 if (sk->sk_state != TCP_ESTABLISHED) 1845 return -ENOTCONN; 1846 1847 if (msg->msg_namelen) 1848 msg->msg_namelen = 0; 1849 1850 return unix_dgram_sendmsg(sock, msg, len); 1851 } 1852 1853 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 1854 size_t size, int flags) 1855 { 1856 struct sock *sk = sock->sk; 1857 1858 if (sk->sk_state != TCP_ESTABLISHED) 1859 return -ENOTCONN; 1860 1861 return unix_dgram_recvmsg(sock, msg, size, flags); 1862 } 1863 1864 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 1865 { 1866 struct unix_sock *u = unix_sk(sk); 1867 1868 if (u->addr) { 1869 msg->msg_namelen = u->addr->len; 1870 memcpy(msg->msg_name, u->addr->name, u->addr->len); 1871 } 1872 } 1873 1874 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, 1875 size_t size, int flags) 1876 { 1877 struct scm_cookie scm; 1878 struct sock *sk = sock->sk; 1879 struct unix_sock *u = unix_sk(sk); 1880 int noblock = flags & MSG_DONTWAIT; 1881 struct sk_buff *skb; 1882 int err; 1883 int peeked, skip; 1884 1885 err = -EOPNOTSUPP; 1886 if (flags&MSG_OOB) 1887 goto out; 1888 1889 err = mutex_lock_interruptible(&u->readlock); 1890 if (unlikely(err)) { 1891 /* recvmsg() in non blocking mode is supposed to return -EAGAIN 1892 * sk_rcvtimeo is not honored by mutex_lock_interruptible() 1893 */ 1894 err = noblock ? -EAGAIN : -ERESTARTSYS; 1895 goto out; 1896 } 1897 1898 skip = sk_peek_offset(sk, flags); 1899 1900 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); 1901 if (!skb) { 1902 unix_state_lock(sk); 1903 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 1904 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 1905 (sk->sk_shutdown & RCV_SHUTDOWN)) 1906 err = 0; 1907 unix_state_unlock(sk); 1908 goto out_unlock; 1909 } 1910 1911 wake_up_interruptible_sync_poll(&u->peer_wait, 1912 POLLOUT | POLLWRNORM | POLLWRBAND); 1913 1914 if (msg->msg_name) 1915 unix_copy_addr(msg, skb->sk); 1916 1917 if (size > skb->len - skip) 1918 size = skb->len - skip; 1919 else if (size < skb->len - skip) 1920 msg->msg_flags |= MSG_TRUNC; 1921 1922 err = skb_copy_datagram_msg(skb, skip, msg, size); 1923 if (err) 1924 goto out_free; 1925 1926 if (sock_flag(sk, SOCK_RCVTSTAMP)) 1927 __sock_recv_timestamp(msg, sk, skb); 1928 1929 memset(&scm, 0, sizeof(scm)); 1930 1931 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 1932 unix_set_secdata(&scm, skb); 1933 1934 if (!(flags & MSG_PEEK)) { 1935 if (UNIXCB(skb).fp) 1936 unix_detach_fds(&scm, skb); 1937 1938 sk_peek_offset_bwd(sk, skb->len); 1939 } else { 1940 /* It is questionable: on PEEK we could: 1941 - do not return fds - good, but too simple 8) 1942 - return fds, and do not return them on read (old strategy, 1943 apparently wrong) 1944 - clone fds (I chose it for now, it is the most universal 1945 solution) 1946 1947 POSIX 1003.1g does not actually define this clearly 1948 at all. POSIX 1003.1g doesn't define a lot of things 1949 clearly however! 1950 1951 */ 1952 1953 sk_peek_offset_fwd(sk, size); 1954 1955 if (UNIXCB(skb).fp) 1956 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 1957 } 1958 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 1959 1960 scm_recv(sock, msg, &scm, flags); 1961 1962 out_free: 1963 skb_free_datagram(sk, skb); 1964 out_unlock: 1965 mutex_unlock(&u->readlock); 1966 out: 1967 return err; 1968 } 1969 1970 /* 1971 * Sleep until more data has arrived. But check for races.. 1972 */ 1973 static long unix_stream_data_wait(struct sock *sk, long timeo, 1974 struct sk_buff *last, unsigned int last_len) 1975 { 1976 struct sk_buff *tail; 1977 DEFINE_WAIT(wait); 1978 1979 unix_state_lock(sk); 1980 1981 for (;;) { 1982 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1983 1984 tail = skb_peek_tail(&sk->sk_receive_queue); 1985 if (tail != last || 1986 (tail && tail->len != last_len) || 1987 sk->sk_err || 1988 (sk->sk_shutdown & RCV_SHUTDOWN) || 1989 signal_pending(current) || 1990 !timeo) 1991 break; 1992 1993 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1994 unix_state_unlock(sk); 1995 timeo = freezable_schedule_timeout(timeo); 1996 unix_state_lock(sk); 1997 1998 if (sock_flag(sk, SOCK_DEAD)) 1999 break; 2000 2001 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2002 } 2003 2004 finish_wait(sk_sleep(sk), &wait); 2005 unix_state_unlock(sk); 2006 return timeo; 2007 } 2008 2009 static unsigned int unix_skb_len(const struct sk_buff *skb) 2010 { 2011 return skb->len - UNIXCB(skb).consumed; 2012 } 2013 2014 struct unix_stream_read_state { 2015 int (*recv_actor)(struct sk_buff *, int, int, 2016 struct unix_stream_read_state *); 2017 struct socket *socket; 2018 struct msghdr *msg; 2019 struct pipe_inode_info *pipe; 2020 size_t size; 2021 int flags; 2022 unsigned int splice_flags; 2023 }; 2024 2025 static int unix_stream_read_generic(struct unix_stream_read_state *state) 2026 { 2027 struct scm_cookie scm; 2028 struct socket *sock = state->socket; 2029 struct sock *sk = sock->sk; 2030 struct unix_sock *u = unix_sk(sk); 2031 int copied = 0; 2032 int flags = state->flags; 2033 int noblock = flags & MSG_DONTWAIT; 2034 bool check_creds = false; 2035 int target; 2036 int err = 0; 2037 long timeo; 2038 int skip; 2039 size_t size = state->size; 2040 unsigned int last_len; 2041 2042 err = -EINVAL; 2043 if (sk->sk_state != TCP_ESTABLISHED) 2044 goto out; 2045 2046 err = -EOPNOTSUPP; 2047 if (flags & MSG_OOB) 2048 goto out; 2049 2050 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2051 timeo = sock_rcvtimeo(sk, noblock); 2052 2053 memset(&scm, 0, sizeof(scm)); 2054 2055 /* Lock the socket to prevent queue disordering 2056 * while sleeps in memcpy_tomsg 2057 */ 2058 err = mutex_lock_interruptible(&u->readlock); 2059 if (unlikely(err)) { 2060 /* recvmsg() in non blocking mode is supposed to return -EAGAIN 2061 * sk_rcvtimeo is not honored by mutex_lock_interruptible() 2062 */ 2063 err = noblock ? -EAGAIN : -ERESTARTSYS; 2064 goto out; 2065 } 2066 2067 do { 2068 int chunk; 2069 struct sk_buff *skb, *last; 2070 2071 unix_state_lock(sk); 2072 if (sock_flag(sk, SOCK_DEAD)) { 2073 err = -ECONNRESET; 2074 goto unlock; 2075 } 2076 last = skb = skb_peek(&sk->sk_receive_queue); 2077 last_len = last ? last->len : 0; 2078 again: 2079 if (skb == NULL) { 2080 unix_sk(sk)->recursion_level = 0; 2081 if (copied >= target) 2082 goto unlock; 2083 2084 /* 2085 * POSIX 1003.1g mandates this order. 2086 */ 2087 2088 err = sock_error(sk); 2089 if (err) 2090 goto unlock; 2091 if (sk->sk_shutdown & RCV_SHUTDOWN) 2092 goto unlock; 2093 2094 unix_state_unlock(sk); 2095 err = -EAGAIN; 2096 if (!timeo) 2097 break; 2098 mutex_unlock(&u->readlock); 2099 2100 timeo = unix_stream_data_wait(sk, timeo, last, 2101 last_len); 2102 2103 if (signal_pending(current) || 2104 mutex_lock_interruptible(&u->readlock)) { 2105 err = sock_intr_errno(timeo); 2106 goto out; 2107 } 2108 2109 continue; 2110 unlock: 2111 unix_state_unlock(sk); 2112 break; 2113 } 2114 2115 skip = sk_peek_offset(sk, flags); 2116 while (skip >= unix_skb_len(skb)) { 2117 skip -= unix_skb_len(skb); 2118 last = skb; 2119 last_len = skb->len; 2120 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2121 if (!skb) 2122 goto again; 2123 } 2124 2125 unix_state_unlock(sk); 2126 2127 if (check_creds) { 2128 /* Never glue messages from different writers */ 2129 if ((UNIXCB(skb).pid != scm.pid) || 2130 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || 2131 !gid_eq(UNIXCB(skb).gid, scm.creds.gid) || 2132 !unix_secdata_eq(&scm, skb)) 2133 break; 2134 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2135 /* Copy credentials */ 2136 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2137 unix_set_secdata(&scm, skb); 2138 check_creds = true; 2139 } 2140 2141 /* Copy address just once */ 2142 if (state->msg && state->msg->msg_name) { 2143 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2144 state->msg->msg_name); 2145 unix_copy_addr(state->msg, skb->sk); 2146 sunaddr = NULL; 2147 } 2148 2149 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2150 chunk = state->recv_actor(skb, skip, chunk, state); 2151 if (chunk < 0) { 2152 if (copied == 0) 2153 copied = -EFAULT; 2154 break; 2155 } 2156 copied += chunk; 2157 size -= chunk; 2158 2159 /* Mark read part of skb as used */ 2160 if (!(flags & MSG_PEEK)) { 2161 UNIXCB(skb).consumed += chunk; 2162 2163 sk_peek_offset_bwd(sk, chunk); 2164 2165 if (UNIXCB(skb).fp) 2166 unix_detach_fds(&scm, skb); 2167 2168 if (unix_skb_len(skb)) 2169 break; 2170 2171 skb_unlink(skb, &sk->sk_receive_queue); 2172 consume_skb(skb); 2173 2174 if (scm.fp) 2175 break; 2176 } else { 2177 /* It is questionable, see note in unix_dgram_recvmsg. 2178 */ 2179 if (UNIXCB(skb).fp) 2180 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2181 2182 sk_peek_offset_fwd(sk, chunk); 2183 2184 break; 2185 } 2186 } while (size); 2187 2188 mutex_unlock(&u->readlock); 2189 if (state->msg) 2190 scm_recv(sock, state->msg, &scm, flags); 2191 else 2192 scm_destroy(&scm); 2193 out: 2194 return copied ? : err; 2195 } 2196 2197 static int unix_stream_read_actor(struct sk_buff *skb, 2198 int skip, int chunk, 2199 struct unix_stream_read_state *state) 2200 { 2201 int ret; 2202 2203 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2204 state->msg, chunk); 2205 return ret ?: chunk; 2206 } 2207 2208 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2209 size_t size, int flags) 2210 { 2211 struct unix_stream_read_state state = { 2212 .recv_actor = unix_stream_read_actor, 2213 .socket = sock, 2214 .msg = msg, 2215 .size = size, 2216 .flags = flags 2217 }; 2218 2219 return unix_stream_read_generic(&state); 2220 } 2221 2222 static ssize_t skb_unix_socket_splice(struct sock *sk, 2223 struct pipe_inode_info *pipe, 2224 struct splice_pipe_desc *spd) 2225 { 2226 int ret; 2227 struct unix_sock *u = unix_sk(sk); 2228 2229 mutex_unlock(&u->readlock); 2230 ret = splice_to_pipe(pipe, spd); 2231 mutex_lock(&u->readlock); 2232 2233 return ret; 2234 } 2235 2236 static int unix_stream_splice_actor(struct sk_buff *skb, 2237 int skip, int chunk, 2238 struct unix_stream_read_state *state) 2239 { 2240 return skb_splice_bits(skb, state->socket->sk, 2241 UNIXCB(skb).consumed + skip, 2242 state->pipe, chunk, state->splice_flags, 2243 skb_unix_socket_splice); 2244 } 2245 2246 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2247 struct pipe_inode_info *pipe, 2248 size_t size, unsigned int flags) 2249 { 2250 struct unix_stream_read_state state = { 2251 .recv_actor = unix_stream_splice_actor, 2252 .socket = sock, 2253 .pipe = pipe, 2254 .size = size, 2255 .splice_flags = flags, 2256 }; 2257 2258 if (unlikely(*ppos)) 2259 return -ESPIPE; 2260 2261 if (sock->file->f_flags & O_NONBLOCK || 2262 flags & SPLICE_F_NONBLOCK) 2263 state.flags = MSG_DONTWAIT; 2264 2265 return unix_stream_read_generic(&state); 2266 } 2267 2268 static int unix_shutdown(struct socket *sock, int mode) 2269 { 2270 struct sock *sk = sock->sk; 2271 struct sock *other; 2272 2273 if (mode < SHUT_RD || mode > SHUT_RDWR) 2274 return -EINVAL; 2275 /* This maps: 2276 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2277 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2278 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2279 */ 2280 ++mode; 2281 2282 unix_state_lock(sk); 2283 sk->sk_shutdown |= mode; 2284 other = unix_peer(sk); 2285 if (other) 2286 sock_hold(other); 2287 unix_state_unlock(sk); 2288 sk->sk_state_change(sk); 2289 2290 if (other && 2291 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2292 2293 int peer_mode = 0; 2294 2295 if (mode&RCV_SHUTDOWN) 2296 peer_mode |= SEND_SHUTDOWN; 2297 if (mode&SEND_SHUTDOWN) 2298 peer_mode |= RCV_SHUTDOWN; 2299 unix_state_lock(other); 2300 other->sk_shutdown |= peer_mode; 2301 unix_state_unlock(other); 2302 other->sk_state_change(other); 2303 if (peer_mode == SHUTDOWN_MASK) 2304 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2305 else if (peer_mode & RCV_SHUTDOWN) 2306 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2307 } 2308 if (other) 2309 sock_put(other); 2310 2311 return 0; 2312 } 2313 2314 long unix_inq_len(struct sock *sk) 2315 { 2316 struct sk_buff *skb; 2317 long amount = 0; 2318 2319 if (sk->sk_state == TCP_LISTEN) 2320 return -EINVAL; 2321 2322 spin_lock(&sk->sk_receive_queue.lock); 2323 if (sk->sk_type == SOCK_STREAM || 2324 sk->sk_type == SOCK_SEQPACKET) { 2325 skb_queue_walk(&sk->sk_receive_queue, skb) 2326 amount += unix_skb_len(skb); 2327 } else { 2328 skb = skb_peek(&sk->sk_receive_queue); 2329 if (skb) 2330 amount = skb->len; 2331 } 2332 spin_unlock(&sk->sk_receive_queue.lock); 2333 2334 return amount; 2335 } 2336 EXPORT_SYMBOL_GPL(unix_inq_len); 2337 2338 long unix_outq_len(struct sock *sk) 2339 { 2340 return sk_wmem_alloc_get(sk); 2341 } 2342 EXPORT_SYMBOL_GPL(unix_outq_len); 2343 2344 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2345 { 2346 struct sock *sk = sock->sk; 2347 long amount = 0; 2348 int err; 2349 2350 switch (cmd) { 2351 case SIOCOUTQ: 2352 amount = unix_outq_len(sk); 2353 err = put_user(amount, (int __user *)arg); 2354 break; 2355 case SIOCINQ: 2356 amount = unix_inq_len(sk); 2357 if (amount < 0) 2358 err = amount; 2359 else 2360 err = put_user(amount, (int __user *)arg); 2361 break; 2362 default: 2363 err = -ENOIOCTLCMD; 2364 break; 2365 } 2366 return err; 2367 } 2368 2369 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2370 { 2371 struct sock *sk = sock->sk; 2372 unsigned int mask; 2373 2374 sock_poll_wait(file, sk_sleep(sk), wait); 2375 mask = 0; 2376 2377 /* exceptional events? */ 2378 if (sk->sk_err) 2379 mask |= POLLERR; 2380 if (sk->sk_shutdown == SHUTDOWN_MASK) 2381 mask |= POLLHUP; 2382 if (sk->sk_shutdown & RCV_SHUTDOWN) 2383 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2384 2385 /* readable? */ 2386 if (!skb_queue_empty(&sk->sk_receive_queue)) 2387 mask |= POLLIN | POLLRDNORM; 2388 2389 /* Connection-based need to check for termination and startup */ 2390 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 2391 sk->sk_state == TCP_CLOSE) 2392 mask |= POLLHUP; 2393 2394 /* 2395 * we set writable also when the other side has shut down the 2396 * connection. This prevents stuck sockets. 2397 */ 2398 if (unix_writable(sk)) 2399 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2400 2401 return mask; 2402 } 2403 2404 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, 2405 poll_table *wait) 2406 { 2407 struct sock *sk = sock->sk, *other; 2408 unsigned int mask, writable; 2409 2410 sock_poll_wait(file, sk_sleep(sk), wait); 2411 mask = 0; 2412 2413 /* exceptional events? */ 2414 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2415 mask |= POLLERR | 2416 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 2417 2418 if (sk->sk_shutdown & RCV_SHUTDOWN) 2419 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2420 if (sk->sk_shutdown == SHUTDOWN_MASK) 2421 mask |= POLLHUP; 2422 2423 /* readable? */ 2424 if (!skb_queue_empty(&sk->sk_receive_queue)) 2425 mask |= POLLIN | POLLRDNORM; 2426 2427 /* Connection-based need to check for termination and startup */ 2428 if (sk->sk_type == SOCK_SEQPACKET) { 2429 if (sk->sk_state == TCP_CLOSE) 2430 mask |= POLLHUP; 2431 /* connection hasn't started yet? */ 2432 if (sk->sk_state == TCP_SYN_SENT) 2433 return mask; 2434 } 2435 2436 /* No write status requested, avoid expensive OUT tests. */ 2437 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) 2438 return mask; 2439 2440 writable = unix_writable(sk); 2441 other = unix_peer_get(sk); 2442 if (other) { 2443 if (unix_peer(other) != sk) { 2444 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); 2445 if (unix_recvq_full(other)) 2446 writable = 0; 2447 } 2448 sock_put(other); 2449 } 2450 2451 if (writable) 2452 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2453 else 2454 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 2455 2456 return mask; 2457 } 2458 2459 #ifdef CONFIG_PROC_FS 2460 2461 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 2462 2463 #define get_bucket(x) ((x) >> BUCKET_SPACE) 2464 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) 2465 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 2466 2467 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 2468 { 2469 unsigned long offset = get_offset(*pos); 2470 unsigned long bucket = get_bucket(*pos); 2471 struct sock *sk; 2472 unsigned long count = 0; 2473 2474 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { 2475 if (sock_net(sk) != seq_file_net(seq)) 2476 continue; 2477 if (++count == offset) 2478 break; 2479 } 2480 2481 return sk; 2482 } 2483 2484 static struct sock *unix_next_socket(struct seq_file *seq, 2485 struct sock *sk, 2486 loff_t *pos) 2487 { 2488 unsigned long bucket; 2489 2490 while (sk > (struct sock *)SEQ_START_TOKEN) { 2491 sk = sk_next(sk); 2492 if (!sk) 2493 goto next_bucket; 2494 if (sock_net(sk) == seq_file_net(seq)) 2495 return sk; 2496 } 2497 2498 do { 2499 sk = unix_from_bucket(seq, pos); 2500 if (sk) 2501 return sk; 2502 2503 next_bucket: 2504 bucket = get_bucket(*pos) + 1; 2505 *pos = set_bucket_offset(bucket, 1); 2506 } while (bucket < ARRAY_SIZE(unix_socket_table)); 2507 2508 return NULL; 2509 } 2510 2511 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 2512 __acquires(unix_table_lock) 2513 { 2514 spin_lock(&unix_table_lock); 2515 2516 if (!*pos) 2517 return SEQ_START_TOKEN; 2518 2519 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) 2520 return NULL; 2521 2522 return unix_next_socket(seq, NULL, pos); 2523 } 2524 2525 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2526 { 2527 ++*pos; 2528 return unix_next_socket(seq, v, pos); 2529 } 2530 2531 static void unix_seq_stop(struct seq_file *seq, void *v) 2532 __releases(unix_table_lock) 2533 { 2534 spin_unlock(&unix_table_lock); 2535 } 2536 2537 static int unix_seq_show(struct seq_file *seq, void *v) 2538 { 2539 2540 if (v == SEQ_START_TOKEN) 2541 seq_puts(seq, "Num RefCount Protocol Flags Type St " 2542 "Inode Path\n"); 2543 else { 2544 struct sock *s = v; 2545 struct unix_sock *u = unix_sk(s); 2546 unix_state_lock(s); 2547 2548 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 2549 s, 2550 atomic_read(&s->sk_refcnt), 2551 0, 2552 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 2553 s->sk_type, 2554 s->sk_socket ? 2555 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 2556 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 2557 sock_i_ino(s)); 2558 2559 if (u->addr) { 2560 int i, len; 2561 seq_putc(seq, ' '); 2562 2563 i = 0; 2564 len = u->addr->len - sizeof(short); 2565 if (!UNIX_ABSTRACT(s)) 2566 len--; 2567 else { 2568 seq_putc(seq, '@'); 2569 i++; 2570 } 2571 for ( ; i < len; i++) 2572 seq_putc(seq, u->addr->name->sun_path[i]); 2573 } 2574 unix_state_unlock(s); 2575 seq_putc(seq, '\n'); 2576 } 2577 2578 return 0; 2579 } 2580 2581 static const struct seq_operations unix_seq_ops = { 2582 .start = unix_seq_start, 2583 .next = unix_seq_next, 2584 .stop = unix_seq_stop, 2585 .show = unix_seq_show, 2586 }; 2587 2588 static int unix_seq_open(struct inode *inode, struct file *file) 2589 { 2590 return seq_open_net(inode, file, &unix_seq_ops, 2591 sizeof(struct seq_net_private)); 2592 } 2593 2594 static const struct file_operations unix_seq_fops = { 2595 .owner = THIS_MODULE, 2596 .open = unix_seq_open, 2597 .read = seq_read, 2598 .llseek = seq_lseek, 2599 .release = seq_release_net, 2600 }; 2601 2602 #endif 2603 2604 static const struct net_proto_family unix_family_ops = { 2605 .family = PF_UNIX, 2606 .create = unix_create, 2607 .owner = THIS_MODULE, 2608 }; 2609 2610 2611 static int __net_init unix_net_init(struct net *net) 2612 { 2613 int error = -ENOMEM; 2614 2615 net->unx.sysctl_max_dgram_qlen = 10; 2616 if (unix_sysctl_register(net)) 2617 goto out; 2618 2619 #ifdef CONFIG_PROC_FS 2620 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) { 2621 unix_sysctl_unregister(net); 2622 goto out; 2623 } 2624 #endif 2625 error = 0; 2626 out: 2627 return error; 2628 } 2629 2630 static void __net_exit unix_net_exit(struct net *net) 2631 { 2632 unix_sysctl_unregister(net); 2633 remove_proc_entry("unix", net->proc_net); 2634 } 2635 2636 static struct pernet_operations unix_net_ops = { 2637 .init = unix_net_init, 2638 .exit = unix_net_exit, 2639 }; 2640 2641 static int __init af_unix_init(void) 2642 { 2643 int rc = -1; 2644 2645 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2646 2647 rc = proto_register(&unix_proto, 1); 2648 if (rc != 0) { 2649 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 2650 goto out; 2651 } 2652 2653 sock_register(&unix_family_ops); 2654 register_pernet_subsys(&unix_net_ops); 2655 out: 2656 return rc; 2657 } 2658 2659 static void __exit af_unix_exit(void) 2660 { 2661 sock_unregister(PF_UNIX); 2662 proto_unregister(&unix_proto); 2663 unregister_pernet_subsys(&unix_net_ops); 2664 } 2665 2666 /* Earlier than device_initcall() so that other drivers invoking 2667 request_module() don't end up in a loop when modprobe tries 2668 to use a UNIX socket. But later than subsys_initcall() because 2669 we depend on stuff initialised there */ 2670 fs_initcall(af_unix_init); 2671 module_exit(af_unix_exit); 2672 2673 MODULE_LICENSE("GPL"); 2674 MODULE_ALIAS_NETPROTO(PF_UNIX); 2675