1 /* 2 * NET4: Implementation of BSD Unix domain sockets. 3 * 4 * Authors: Alan Cox, <alan.cox@linux.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $ 12 * 13 * Fixes: 14 * Linus Torvalds : Assorted bug cures. 15 * Niibe Yutaka : async I/O support. 16 * Carsten Paeth : PF_UNIX check, address fixes. 17 * Alan Cox : Limit size of allocated blocks. 18 * Alan Cox : Fixed the stupid socketpair bug. 19 * Alan Cox : BSD compatibility fine tuning. 20 * Alan Cox : Fixed a bug in connect when interrupted. 21 * Alan Cox : Sorted out a proper draft version of 22 * file descriptor passing hacked up from 23 * Mike Shaver's work. 24 * Marty Leisner : Fixes to fd passing 25 * Nick Nevin : recvmsg bugfix. 26 * Alan Cox : Started proper garbage collector 27 * Heiko EiBfeldt : Missing verify_area check 28 * Alan Cox : Started POSIXisms 29 * Andreas Schwab : Replace inode by dentry for proper 30 * reference counting 31 * Kirk Petersen : Made this a module 32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 33 * Lots of bug fixes. 34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 35 * by above two patches. 36 * Andrea Arcangeli : If possible we block in connect(2) 37 * if the max backlog of the listen socket 38 * is been reached. This won't break 39 * old apps and it will avoid huge amount 40 * of socks hashed (this for unix_gc() 41 * performances reasons). 42 * Security fix that limits the max 43 * number of socks to 2*max_files and 44 * the number of skb queueable in the 45 * dgram receiver. 46 * Artur Skawina : Hash function optimizations 47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 48 * Malcolm Beattie : Set peercred for socketpair 49 * Michal Ostrowski : Module initialization cleanup. 50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 51 * the core infrastructure is doing that 52 * for all net proto families now (2.5.69+) 53 * 54 * 55 * Known differences from reference BSD that was tested: 56 * 57 * [TO FIX] 58 * ECONNREFUSED is not returned from one end of a connected() socket to the 59 * other the moment one end closes. 60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 61 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 62 * [NOT TO FIX] 63 * accept() returns a path name even if the connecting socket has closed 64 * in the meantime (BSD loses the path and gives up). 65 * accept() returns 0 length path for an unbound connector. BSD returns 16 66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 67 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 68 * BSD af_unix apparently has connect forgetting to block properly. 69 * (need to check this with the POSIX spec in detail) 70 * 71 * Differences from 2.0.0-11-... (ANK) 72 * Bug fixes and improvements. 73 * - client shutdown killed server socket. 74 * - removed all useless cli/sti pairs. 75 * 76 * Semantic changes/extensions. 77 * - generic control message passing. 78 * - SCM_CREDENTIALS control message. 79 * - "Abstract" (not FS based) socket bindings. 80 * Abstract names are sequences of bytes (not zero terminated) 81 * started by 0, so that this name space does not intersect 82 * with BSD names. 83 */ 84 85 #include <linux/module.h> 86 #include <linux/kernel.h> 87 #include <linux/signal.h> 88 #include <linux/sched.h> 89 #include <linux/errno.h> 90 #include <linux/string.h> 91 #include <linux/stat.h> 92 #include <linux/dcache.h> 93 #include <linux/namei.h> 94 #include <linux/socket.h> 95 #include <linux/un.h> 96 #include <linux/fcntl.h> 97 #include <linux/termios.h> 98 #include <linux/sockios.h> 99 #include <linux/net.h> 100 #include <linux/in.h> 101 #include <linux/fs.h> 102 #include <linux/slab.h> 103 #include <asm/uaccess.h> 104 #include <linux/skbuff.h> 105 #include <linux/netdevice.h> 106 #include <net/sock.h> 107 #include <net/tcp_states.h> 108 #include <net/af_unix.h> 109 #include <linux/proc_fs.h> 110 #include <linux/seq_file.h> 111 #include <net/scm.h> 112 #include <linux/init.h> 113 #include <linux/poll.h> 114 #include <linux/rtnetlink.h> 115 #include <linux/mount.h> 116 #include <net/checksum.h> 117 #include <linux/security.h> 118 119 int sysctl_unix_max_dgram_qlen __read_mostly = 10; 120 121 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 122 DEFINE_SPINLOCK(unix_table_lock); 123 static atomic_t unix_nr_socks = ATOMIC_INIT(0); 124 125 #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) 126 127 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) 128 129 #ifdef CONFIG_SECURITY_NETWORK 130 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 131 { 132 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); 133 } 134 135 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 136 { 137 scm->secid = *UNIXSID(skb); 138 } 139 #else 140 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 141 { } 142 143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 144 { } 145 #endif /* CONFIG_SECURITY_NETWORK */ 146 147 /* 148 * SMP locking strategy: 149 * hash table is protected with spinlock unix_table_lock 150 * each socket state is protected by separate rwlock. 151 */ 152 153 static inline unsigned unix_hash_fold(__wsum n) 154 { 155 unsigned hash = (__force unsigned)n; 156 hash ^= hash>>16; 157 hash ^= hash>>8; 158 return hash&(UNIX_HASH_SIZE-1); 159 } 160 161 #define unix_peer(sk) (unix_sk(sk)->peer) 162 163 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 164 { 165 return unix_peer(osk) == sk; 166 } 167 168 static inline int unix_may_send(struct sock *sk, struct sock *osk) 169 { 170 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); 171 } 172 173 static struct sock *unix_peer_get(struct sock *s) 174 { 175 struct sock *peer; 176 177 unix_state_rlock(s); 178 peer = unix_peer(s); 179 if (peer) 180 sock_hold(peer); 181 unix_state_runlock(s); 182 return peer; 183 } 184 185 static inline void unix_release_addr(struct unix_address *addr) 186 { 187 if (atomic_dec_and_test(&addr->refcnt)) 188 kfree(addr); 189 } 190 191 /* 192 * Check unix socket name: 193 * - should be not zero length. 194 * - if started by not zero, should be NULL terminated (FS object) 195 * - if started by zero, it is abstract name. 196 */ 197 198 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) 199 { 200 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 201 return -EINVAL; 202 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 203 return -EINVAL; 204 if (sunaddr->sun_path[0]) { 205 /* 206 * This may look like an off by one error but it is a bit more 207 * subtle. 108 is the longest valid AF_UNIX path for a binding. 208 * sun_path[108] doesnt as such exist. However in kernel space 209 * we are guaranteed that it is a valid memory location in our 210 * kernel address buffer. 211 */ 212 ((char *)sunaddr)[len]=0; 213 len = strlen(sunaddr->sun_path)+1+sizeof(short); 214 return len; 215 } 216 217 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); 218 return len; 219 } 220 221 static void __unix_remove_socket(struct sock *sk) 222 { 223 sk_del_node_init(sk); 224 } 225 226 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 227 { 228 BUG_TRAP(sk_unhashed(sk)); 229 sk_add_node(sk, list); 230 } 231 232 static inline void unix_remove_socket(struct sock *sk) 233 { 234 spin_lock(&unix_table_lock); 235 __unix_remove_socket(sk); 236 spin_unlock(&unix_table_lock); 237 } 238 239 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 240 { 241 spin_lock(&unix_table_lock); 242 __unix_insert_socket(list, sk); 243 spin_unlock(&unix_table_lock); 244 } 245 246 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, 247 int len, int type, unsigned hash) 248 { 249 struct sock *s; 250 struct hlist_node *node; 251 252 sk_for_each(s, node, &unix_socket_table[hash ^ type]) { 253 struct unix_sock *u = unix_sk(s); 254 255 if (u->addr->len == len && 256 !memcmp(u->addr->name, sunname, len)) 257 goto found; 258 } 259 s = NULL; 260 found: 261 return s; 262 } 263 264 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, 265 int len, int type, 266 unsigned hash) 267 { 268 struct sock *s; 269 270 spin_lock(&unix_table_lock); 271 s = __unix_find_socket_byname(sunname, len, type, hash); 272 if (s) 273 sock_hold(s); 274 spin_unlock(&unix_table_lock); 275 return s; 276 } 277 278 static struct sock *unix_find_socket_byinode(struct inode *i) 279 { 280 struct sock *s; 281 struct hlist_node *node; 282 283 spin_lock(&unix_table_lock); 284 sk_for_each(s, node, 285 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 286 struct dentry *dentry = unix_sk(s)->dentry; 287 288 if(dentry && dentry->d_inode == i) 289 { 290 sock_hold(s); 291 goto found; 292 } 293 } 294 s = NULL; 295 found: 296 spin_unlock(&unix_table_lock); 297 return s; 298 } 299 300 static inline int unix_writable(struct sock *sk) 301 { 302 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 303 } 304 305 static void unix_write_space(struct sock *sk) 306 { 307 read_lock(&sk->sk_callback_lock); 308 if (unix_writable(sk)) { 309 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 310 wake_up_interruptible(sk->sk_sleep); 311 sk_wake_async(sk, 2, POLL_OUT); 312 } 313 read_unlock(&sk->sk_callback_lock); 314 } 315 316 /* When dgram socket disconnects (or changes its peer), we clear its receive 317 * queue of packets arrived from previous peer. First, it allows to do 318 * flow control based only on wmem_alloc; second, sk connected to peer 319 * may receive messages only from that peer. */ 320 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 321 { 322 if (!skb_queue_empty(&sk->sk_receive_queue)) { 323 skb_queue_purge(&sk->sk_receive_queue); 324 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 325 326 /* If one link of bidirectional dgram pipe is disconnected, 327 * we signal error. Messages are lost. Do not make this, 328 * when peer was not connected to us. 329 */ 330 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 331 other->sk_err = ECONNRESET; 332 other->sk_error_report(other); 333 } 334 } 335 } 336 337 static void unix_sock_destructor(struct sock *sk) 338 { 339 struct unix_sock *u = unix_sk(sk); 340 341 skb_queue_purge(&sk->sk_receive_queue); 342 343 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); 344 BUG_TRAP(sk_unhashed(sk)); 345 BUG_TRAP(!sk->sk_socket); 346 if (!sock_flag(sk, SOCK_DEAD)) { 347 printk("Attempt to release alive unix socket: %p\n", sk); 348 return; 349 } 350 351 if (u->addr) 352 unix_release_addr(u->addr); 353 354 atomic_dec(&unix_nr_socks); 355 #ifdef UNIX_REFCNT_DEBUG 356 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks)); 357 #endif 358 } 359 360 static int unix_release_sock (struct sock *sk, int embrion) 361 { 362 struct unix_sock *u = unix_sk(sk); 363 struct dentry *dentry; 364 struct vfsmount *mnt; 365 struct sock *skpair; 366 struct sk_buff *skb; 367 int state; 368 369 unix_remove_socket(sk); 370 371 /* Clear state */ 372 unix_state_wlock(sk); 373 sock_orphan(sk); 374 sk->sk_shutdown = SHUTDOWN_MASK; 375 dentry = u->dentry; 376 u->dentry = NULL; 377 mnt = u->mnt; 378 u->mnt = NULL; 379 state = sk->sk_state; 380 sk->sk_state = TCP_CLOSE; 381 unix_state_wunlock(sk); 382 383 wake_up_interruptible_all(&u->peer_wait); 384 385 skpair=unix_peer(sk); 386 387 if (skpair!=NULL) { 388 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 389 unix_state_wlock(skpair); 390 /* No more writes */ 391 skpair->sk_shutdown = SHUTDOWN_MASK; 392 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 393 skpair->sk_err = ECONNRESET; 394 unix_state_wunlock(skpair); 395 skpair->sk_state_change(skpair); 396 read_lock(&skpair->sk_callback_lock); 397 sk_wake_async(skpair,1,POLL_HUP); 398 read_unlock(&skpair->sk_callback_lock); 399 } 400 sock_put(skpair); /* It may now die */ 401 unix_peer(sk) = NULL; 402 } 403 404 /* Try to flush out this socket. Throw out buffers at least */ 405 406 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 407 if (state==TCP_LISTEN) 408 unix_release_sock(skb->sk, 1); 409 /* passed fds are erased in the kfree_skb hook */ 410 kfree_skb(skb); 411 } 412 413 if (dentry) { 414 dput(dentry); 415 mntput(mnt); 416 } 417 418 sock_put(sk); 419 420 /* ---- Socket is dead now and most probably destroyed ---- */ 421 422 /* 423 * Fixme: BSD difference: In BSD all sockets connected to use get 424 * ECONNRESET and we die on the spot. In Linux we behave 425 * like files and pipes do and wait for the last 426 * dereference. 427 * 428 * Can't we simply set sock->err? 429 * 430 * What the above comment does talk about? --ANK(980817) 431 */ 432 433 if (atomic_read(&unix_tot_inflight)) 434 unix_gc(); /* Garbage collect fds */ 435 436 return 0; 437 } 438 439 static int unix_listen(struct socket *sock, int backlog) 440 { 441 int err; 442 struct sock *sk = sock->sk; 443 struct unix_sock *u = unix_sk(sk); 444 445 err = -EOPNOTSUPP; 446 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) 447 goto out; /* Only stream/seqpacket sockets accept */ 448 err = -EINVAL; 449 if (!u->addr) 450 goto out; /* No listens on an unbound socket */ 451 unix_state_wlock(sk); 452 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 453 goto out_unlock; 454 if (backlog > sk->sk_max_ack_backlog) 455 wake_up_interruptible_all(&u->peer_wait); 456 sk->sk_max_ack_backlog = backlog; 457 sk->sk_state = TCP_LISTEN; 458 /* set credentials so connect can copy them */ 459 sk->sk_peercred.pid = current->tgid; 460 sk->sk_peercred.uid = current->euid; 461 sk->sk_peercred.gid = current->egid; 462 err = 0; 463 464 out_unlock: 465 unix_state_wunlock(sk); 466 out: 467 return err; 468 } 469 470 static int unix_release(struct socket *); 471 static int unix_bind(struct socket *, struct sockaddr *, int); 472 static int unix_stream_connect(struct socket *, struct sockaddr *, 473 int addr_len, int flags); 474 static int unix_socketpair(struct socket *, struct socket *); 475 static int unix_accept(struct socket *, struct socket *, int); 476 static int unix_getname(struct socket *, struct sockaddr *, int *, int); 477 static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 478 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 479 static int unix_shutdown(struct socket *, int); 480 static int unix_stream_sendmsg(struct kiocb *, struct socket *, 481 struct msghdr *, size_t); 482 static int unix_stream_recvmsg(struct kiocb *, struct socket *, 483 struct msghdr *, size_t, int); 484 static int unix_dgram_sendmsg(struct kiocb *, struct socket *, 485 struct msghdr *, size_t); 486 static int unix_dgram_recvmsg(struct kiocb *, struct socket *, 487 struct msghdr *, size_t, int); 488 static int unix_dgram_connect(struct socket *, struct sockaddr *, 489 int, int); 490 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, 491 struct msghdr *, size_t); 492 493 static const struct proto_ops unix_stream_ops = { 494 .family = PF_UNIX, 495 .owner = THIS_MODULE, 496 .release = unix_release, 497 .bind = unix_bind, 498 .connect = unix_stream_connect, 499 .socketpair = unix_socketpair, 500 .accept = unix_accept, 501 .getname = unix_getname, 502 .poll = unix_poll, 503 .ioctl = unix_ioctl, 504 .listen = unix_listen, 505 .shutdown = unix_shutdown, 506 .setsockopt = sock_no_setsockopt, 507 .getsockopt = sock_no_getsockopt, 508 .sendmsg = unix_stream_sendmsg, 509 .recvmsg = unix_stream_recvmsg, 510 .mmap = sock_no_mmap, 511 .sendpage = sock_no_sendpage, 512 }; 513 514 static const struct proto_ops unix_dgram_ops = { 515 .family = PF_UNIX, 516 .owner = THIS_MODULE, 517 .release = unix_release, 518 .bind = unix_bind, 519 .connect = unix_dgram_connect, 520 .socketpair = unix_socketpair, 521 .accept = sock_no_accept, 522 .getname = unix_getname, 523 .poll = datagram_poll, 524 .ioctl = unix_ioctl, 525 .listen = sock_no_listen, 526 .shutdown = unix_shutdown, 527 .setsockopt = sock_no_setsockopt, 528 .getsockopt = sock_no_getsockopt, 529 .sendmsg = unix_dgram_sendmsg, 530 .recvmsg = unix_dgram_recvmsg, 531 .mmap = sock_no_mmap, 532 .sendpage = sock_no_sendpage, 533 }; 534 535 static const struct proto_ops unix_seqpacket_ops = { 536 .family = PF_UNIX, 537 .owner = THIS_MODULE, 538 .release = unix_release, 539 .bind = unix_bind, 540 .connect = unix_stream_connect, 541 .socketpair = unix_socketpair, 542 .accept = unix_accept, 543 .getname = unix_getname, 544 .poll = datagram_poll, 545 .ioctl = unix_ioctl, 546 .listen = unix_listen, 547 .shutdown = unix_shutdown, 548 .setsockopt = sock_no_setsockopt, 549 .getsockopt = sock_no_getsockopt, 550 .sendmsg = unix_seqpacket_sendmsg, 551 .recvmsg = unix_dgram_recvmsg, 552 .mmap = sock_no_mmap, 553 .sendpage = sock_no_sendpage, 554 }; 555 556 static struct proto unix_proto = { 557 .name = "UNIX", 558 .owner = THIS_MODULE, 559 .obj_size = sizeof(struct unix_sock), 560 }; 561 562 /* 563 * AF_UNIX sockets do not interact with hardware, hence they 564 * dont trigger interrupts - so it's safe for them to have 565 * bh-unsafe locking for their sk_receive_queue.lock. Split off 566 * this special lock-class by reinitializing the spinlock key: 567 */ 568 static struct lock_class_key af_unix_sk_receive_queue_lock_key; 569 570 static struct sock * unix_create1(struct socket *sock) 571 { 572 struct sock *sk = NULL; 573 struct unix_sock *u; 574 575 if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) 576 goto out; 577 578 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); 579 if (!sk) 580 goto out; 581 582 atomic_inc(&unix_nr_socks); 583 584 sock_init_data(sock,sk); 585 lockdep_set_class(&sk->sk_receive_queue.lock, 586 &af_unix_sk_receive_queue_lock_key); 587 588 sk->sk_write_space = unix_write_space; 589 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; 590 sk->sk_destruct = unix_sock_destructor; 591 u = unix_sk(sk); 592 u->dentry = NULL; 593 u->mnt = NULL; 594 spin_lock_init(&u->lock); 595 atomic_set(&u->inflight, sock ? 0 : -1); 596 mutex_init(&u->readlock); /* single task reading lock */ 597 init_waitqueue_head(&u->peer_wait); 598 unix_insert_socket(unix_sockets_unbound, sk); 599 out: 600 return sk; 601 } 602 603 static int unix_create(struct socket *sock, int protocol) 604 { 605 if (protocol && protocol != PF_UNIX) 606 return -EPROTONOSUPPORT; 607 608 sock->state = SS_UNCONNECTED; 609 610 switch (sock->type) { 611 case SOCK_STREAM: 612 sock->ops = &unix_stream_ops; 613 break; 614 /* 615 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 616 * nothing uses it. 617 */ 618 case SOCK_RAW: 619 sock->type=SOCK_DGRAM; 620 case SOCK_DGRAM: 621 sock->ops = &unix_dgram_ops; 622 break; 623 case SOCK_SEQPACKET: 624 sock->ops = &unix_seqpacket_ops; 625 break; 626 default: 627 return -ESOCKTNOSUPPORT; 628 } 629 630 return unix_create1(sock) ? 0 : -ENOMEM; 631 } 632 633 static int unix_release(struct socket *sock) 634 { 635 struct sock *sk = sock->sk; 636 637 if (!sk) 638 return 0; 639 640 sock->sk = NULL; 641 642 return unix_release_sock (sk, 0); 643 } 644 645 static int unix_autobind(struct socket *sock) 646 { 647 struct sock *sk = sock->sk; 648 struct unix_sock *u = unix_sk(sk); 649 static u32 ordernum = 1; 650 struct unix_address * addr; 651 int err; 652 653 mutex_lock(&u->readlock); 654 655 err = 0; 656 if (u->addr) 657 goto out; 658 659 err = -ENOMEM; 660 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 661 if (!addr) 662 goto out; 663 664 addr->name->sun_family = AF_UNIX; 665 atomic_set(&addr->refcnt, 1); 666 667 retry: 668 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 669 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); 670 671 spin_lock(&unix_table_lock); 672 ordernum = (ordernum+1)&0xFFFFF; 673 674 if (__unix_find_socket_byname(addr->name, addr->len, sock->type, 675 addr->hash)) { 676 spin_unlock(&unix_table_lock); 677 /* Sanity yield. It is unusual case, but yet... */ 678 if (!(ordernum&0xFF)) 679 yield(); 680 goto retry; 681 } 682 addr->hash ^= sk->sk_type; 683 684 __unix_remove_socket(sk); 685 u->addr = addr; 686 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 687 spin_unlock(&unix_table_lock); 688 err = 0; 689 690 out: mutex_unlock(&u->readlock); 691 return err; 692 } 693 694 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, 695 int type, unsigned hash, int *error) 696 { 697 struct sock *u; 698 struct nameidata nd; 699 int err = 0; 700 701 if (sunname->sun_path[0]) { 702 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); 703 if (err) 704 goto fail; 705 err = vfs_permission(&nd, MAY_WRITE); 706 if (err) 707 goto put_fail; 708 709 err = -ECONNREFUSED; 710 if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) 711 goto put_fail; 712 u=unix_find_socket_byinode(nd.dentry->d_inode); 713 if (!u) 714 goto put_fail; 715 716 if (u->sk_type == type) 717 touch_atime(nd.mnt, nd.dentry); 718 719 path_release(&nd); 720 721 err=-EPROTOTYPE; 722 if (u->sk_type != type) { 723 sock_put(u); 724 goto fail; 725 } 726 } else { 727 err = -ECONNREFUSED; 728 u=unix_find_socket_byname(sunname, len, type, hash); 729 if (u) { 730 struct dentry *dentry; 731 dentry = unix_sk(u)->dentry; 732 if (dentry) 733 touch_atime(unix_sk(u)->mnt, dentry); 734 } else 735 goto fail; 736 } 737 return u; 738 739 put_fail: 740 path_release(&nd); 741 fail: 742 *error=err; 743 return NULL; 744 } 745 746 747 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 748 { 749 struct sock *sk = sock->sk; 750 struct unix_sock *u = unix_sk(sk); 751 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 752 struct dentry * dentry = NULL; 753 struct nameidata nd; 754 int err; 755 unsigned hash; 756 struct unix_address *addr; 757 struct hlist_head *list; 758 759 err = -EINVAL; 760 if (sunaddr->sun_family != AF_UNIX) 761 goto out; 762 763 if (addr_len==sizeof(short)) { 764 err = unix_autobind(sock); 765 goto out; 766 } 767 768 err = unix_mkname(sunaddr, addr_len, &hash); 769 if (err < 0) 770 goto out; 771 addr_len = err; 772 773 mutex_lock(&u->readlock); 774 775 err = -EINVAL; 776 if (u->addr) 777 goto out_up; 778 779 err = -ENOMEM; 780 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 781 if (!addr) 782 goto out_up; 783 784 memcpy(addr->name, sunaddr, addr_len); 785 addr->len = addr_len; 786 addr->hash = hash ^ sk->sk_type; 787 atomic_set(&addr->refcnt, 1); 788 789 if (sunaddr->sun_path[0]) { 790 unsigned int mode; 791 err = 0; 792 /* 793 * Get the parent directory, calculate the hash for last 794 * component. 795 */ 796 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 797 if (err) 798 goto out_mknod_parent; 799 800 dentry = lookup_create(&nd, 0); 801 err = PTR_ERR(dentry); 802 if (IS_ERR(dentry)) 803 goto out_mknod_unlock; 804 805 /* 806 * All right, let's create it. 807 */ 808 mode = S_IFSOCK | 809 (SOCK_INODE(sock)->i_mode & ~current->fs->umask); 810 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); 811 if (err) 812 goto out_mknod_dput; 813 mutex_unlock(&nd.dentry->d_inode->i_mutex); 814 dput(nd.dentry); 815 nd.dentry = dentry; 816 817 addr->hash = UNIX_HASH_SIZE; 818 } 819 820 spin_lock(&unix_table_lock); 821 822 if (!sunaddr->sun_path[0]) { 823 err = -EADDRINUSE; 824 if (__unix_find_socket_byname(sunaddr, addr_len, 825 sk->sk_type, hash)) { 826 unix_release_addr(addr); 827 goto out_unlock; 828 } 829 830 list = &unix_socket_table[addr->hash]; 831 } else { 832 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; 833 u->dentry = nd.dentry; 834 u->mnt = nd.mnt; 835 } 836 837 err = 0; 838 __unix_remove_socket(sk); 839 u->addr = addr; 840 __unix_insert_socket(list, sk); 841 842 out_unlock: 843 spin_unlock(&unix_table_lock); 844 out_up: 845 mutex_unlock(&u->readlock); 846 out: 847 return err; 848 849 out_mknod_dput: 850 dput(dentry); 851 out_mknod_unlock: 852 mutex_unlock(&nd.dentry->d_inode->i_mutex); 853 path_release(&nd); 854 out_mknod_parent: 855 if (err==-EEXIST) 856 err=-EADDRINUSE; 857 unix_release_addr(addr); 858 goto out_up; 859 } 860 861 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 862 int alen, int flags) 863 { 864 struct sock *sk = sock->sk; 865 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; 866 struct sock *other; 867 unsigned hash; 868 int err; 869 870 if (addr->sa_family != AF_UNSPEC) { 871 err = unix_mkname(sunaddr, alen, &hash); 872 if (err < 0) 873 goto out; 874 alen = err; 875 876 if (test_bit(SOCK_PASSCRED, &sock->flags) && 877 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 878 goto out; 879 880 other=unix_find_other(sunaddr, alen, sock->type, hash, &err); 881 if (!other) 882 goto out; 883 884 unix_state_wlock(sk); 885 886 err = -EPERM; 887 if (!unix_may_send(sk, other)) 888 goto out_unlock; 889 890 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 891 if (err) 892 goto out_unlock; 893 894 } else { 895 /* 896 * 1003.1g breaking connected state with AF_UNSPEC 897 */ 898 other = NULL; 899 unix_state_wlock(sk); 900 } 901 902 /* 903 * If it was connected, reconnect. 904 */ 905 if (unix_peer(sk)) { 906 struct sock *old_peer = unix_peer(sk); 907 unix_peer(sk)=other; 908 unix_state_wunlock(sk); 909 910 if (other != old_peer) 911 unix_dgram_disconnected(sk, old_peer); 912 sock_put(old_peer); 913 } else { 914 unix_peer(sk)=other; 915 unix_state_wunlock(sk); 916 } 917 return 0; 918 919 out_unlock: 920 unix_state_wunlock(sk); 921 sock_put(other); 922 out: 923 return err; 924 } 925 926 static long unix_wait_for_peer(struct sock *other, long timeo) 927 { 928 struct unix_sock *u = unix_sk(other); 929 int sched; 930 DEFINE_WAIT(wait); 931 932 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 933 934 sched = !sock_flag(other, SOCK_DEAD) && 935 !(other->sk_shutdown & RCV_SHUTDOWN) && 936 (skb_queue_len(&other->sk_receive_queue) > 937 other->sk_max_ack_backlog); 938 939 unix_state_runlock(other); 940 941 if (sched) 942 timeo = schedule_timeout(timeo); 943 944 finish_wait(&u->peer_wait, &wait); 945 return timeo; 946 } 947 948 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 949 int addr_len, int flags) 950 { 951 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 952 struct sock *sk = sock->sk; 953 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 954 struct sock *newsk = NULL; 955 struct sock *other = NULL; 956 struct sk_buff *skb = NULL; 957 unsigned hash; 958 int st; 959 int err; 960 long timeo; 961 962 err = unix_mkname(sunaddr, addr_len, &hash); 963 if (err < 0) 964 goto out; 965 addr_len = err; 966 967 if (test_bit(SOCK_PASSCRED, &sock->flags) 968 && !u->addr && (err = unix_autobind(sock)) != 0) 969 goto out; 970 971 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 972 973 /* First of all allocate resources. 974 If we will make it after state is locked, 975 we will have to recheck all again in any case. 976 */ 977 978 err = -ENOMEM; 979 980 /* create new sock for complete connection */ 981 newsk = unix_create1(NULL); 982 if (newsk == NULL) 983 goto out; 984 985 /* Allocate skb for sending to listening sock */ 986 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 987 if (skb == NULL) 988 goto out; 989 990 restart: 991 /* Find listening sock. */ 992 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); 993 if (!other) 994 goto out; 995 996 /* Latch state of peer */ 997 unix_state_rlock(other); 998 999 /* Apparently VFS overslept socket death. Retry. */ 1000 if (sock_flag(other, SOCK_DEAD)) { 1001 unix_state_runlock(other); 1002 sock_put(other); 1003 goto restart; 1004 } 1005 1006 err = -ECONNREFUSED; 1007 if (other->sk_state != TCP_LISTEN) 1008 goto out_unlock; 1009 1010 if (skb_queue_len(&other->sk_receive_queue) > 1011 other->sk_max_ack_backlog) { 1012 err = -EAGAIN; 1013 if (!timeo) 1014 goto out_unlock; 1015 1016 timeo = unix_wait_for_peer(other, timeo); 1017 1018 err = sock_intr_errno(timeo); 1019 if (signal_pending(current)) 1020 goto out; 1021 sock_put(other); 1022 goto restart; 1023 } 1024 1025 /* Latch our state. 1026 1027 It is tricky place. We need to grab write lock and cannot 1028 drop lock on peer. It is dangerous because deadlock is 1029 possible. Connect to self case and simultaneous 1030 attempt to connect are eliminated by checking socket 1031 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1032 check this before attempt to grab lock. 1033 1034 Well, and we have to recheck the state after socket locked. 1035 */ 1036 st = sk->sk_state; 1037 1038 switch (st) { 1039 case TCP_CLOSE: 1040 /* This is ok... continue with connect */ 1041 break; 1042 case TCP_ESTABLISHED: 1043 /* Socket is already connected */ 1044 err = -EISCONN; 1045 goto out_unlock; 1046 default: 1047 err = -EINVAL; 1048 goto out_unlock; 1049 } 1050 1051 unix_state_wlock_nested(sk); 1052 1053 if (sk->sk_state != st) { 1054 unix_state_wunlock(sk); 1055 unix_state_runlock(other); 1056 sock_put(other); 1057 goto restart; 1058 } 1059 1060 err = security_unix_stream_connect(sock, other->sk_socket, newsk); 1061 if (err) { 1062 unix_state_wunlock(sk); 1063 goto out_unlock; 1064 } 1065 1066 /* The way is open! Fastly set all the necessary fields... */ 1067 1068 sock_hold(sk); 1069 unix_peer(newsk) = sk; 1070 newsk->sk_state = TCP_ESTABLISHED; 1071 newsk->sk_type = sk->sk_type; 1072 newsk->sk_peercred.pid = current->tgid; 1073 newsk->sk_peercred.uid = current->euid; 1074 newsk->sk_peercred.gid = current->egid; 1075 newu = unix_sk(newsk); 1076 newsk->sk_sleep = &newu->peer_wait; 1077 otheru = unix_sk(other); 1078 1079 /* copy address information from listening to new sock*/ 1080 if (otheru->addr) { 1081 atomic_inc(&otheru->addr->refcnt); 1082 newu->addr = otheru->addr; 1083 } 1084 if (otheru->dentry) { 1085 newu->dentry = dget(otheru->dentry); 1086 newu->mnt = mntget(otheru->mnt); 1087 } 1088 1089 /* Set credentials */ 1090 sk->sk_peercred = other->sk_peercred; 1091 1092 sock->state = SS_CONNECTED; 1093 sk->sk_state = TCP_ESTABLISHED; 1094 sock_hold(newsk); 1095 1096 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ 1097 unix_peer(sk) = newsk; 1098 1099 unix_state_wunlock(sk); 1100 1101 /* take ten and and send info to listening sock */ 1102 spin_lock(&other->sk_receive_queue.lock); 1103 __skb_queue_tail(&other->sk_receive_queue, skb); 1104 /* Undo artificially decreased inflight after embrion 1105 * is installed to listening socket. */ 1106 atomic_inc(&newu->inflight); 1107 spin_unlock(&other->sk_receive_queue.lock); 1108 unix_state_runlock(other); 1109 other->sk_data_ready(other, 0); 1110 sock_put(other); 1111 return 0; 1112 1113 out_unlock: 1114 if (other) 1115 unix_state_runlock(other); 1116 1117 out: 1118 if (skb) 1119 kfree_skb(skb); 1120 if (newsk) 1121 unix_release_sock(newsk, 0); 1122 if (other) 1123 sock_put(other); 1124 return err; 1125 } 1126 1127 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1128 { 1129 struct sock *ska=socka->sk, *skb = sockb->sk; 1130 1131 /* Join our sockets back to back */ 1132 sock_hold(ska); 1133 sock_hold(skb); 1134 unix_peer(ska)=skb; 1135 unix_peer(skb)=ska; 1136 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid; 1137 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid; 1138 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid; 1139 1140 if (ska->sk_type != SOCK_DGRAM) { 1141 ska->sk_state = TCP_ESTABLISHED; 1142 skb->sk_state = TCP_ESTABLISHED; 1143 socka->state = SS_CONNECTED; 1144 sockb->state = SS_CONNECTED; 1145 } 1146 return 0; 1147 } 1148 1149 static int unix_accept(struct socket *sock, struct socket *newsock, int flags) 1150 { 1151 struct sock *sk = sock->sk; 1152 struct sock *tsk; 1153 struct sk_buff *skb; 1154 int err; 1155 1156 err = -EOPNOTSUPP; 1157 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) 1158 goto out; 1159 1160 err = -EINVAL; 1161 if (sk->sk_state != TCP_LISTEN) 1162 goto out; 1163 1164 /* If socket state is TCP_LISTEN it cannot change (for now...), 1165 * so that no locks are necessary. 1166 */ 1167 1168 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1169 if (!skb) { 1170 /* This means receive shutdown. */ 1171 if (err == 0) 1172 err = -EINVAL; 1173 goto out; 1174 } 1175 1176 tsk = skb->sk; 1177 skb_free_datagram(sk, skb); 1178 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1179 1180 /* attach accepted sock to socket */ 1181 unix_state_wlock(tsk); 1182 newsock->state = SS_CONNECTED; 1183 sock_graft(tsk, newsock); 1184 unix_state_wunlock(tsk); 1185 return 0; 1186 1187 out: 1188 return err; 1189 } 1190 1191 1192 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) 1193 { 1194 struct sock *sk = sock->sk; 1195 struct unix_sock *u; 1196 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 1197 int err = 0; 1198 1199 if (peer) { 1200 sk = unix_peer_get(sk); 1201 1202 err = -ENOTCONN; 1203 if (!sk) 1204 goto out; 1205 err = 0; 1206 } else { 1207 sock_hold(sk); 1208 } 1209 1210 u = unix_sk(sk); 1211 unix_state_rlock(sk); 1212 if (!u->addr) { 1213 sunaddr->sun_family = AF_UNIX; 1214 sunaddr->sun_path[0] = 0; 1215 *uaddr_len = sizeof(short); 1216 } else { 1217 struct unix_address *addr = u->addr; 1218 1219 *uaddr_len = addr->len; 1220 memcpy(sunaddr, addr->name, *uaddr_len); 1221 } 1222 unix_state_runlock(sk); 1223 sock_put(sk); 1224 out: 1225 return err; 1226 } 1227 1228 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1229 { 1230 int i; 1231 1232 scm->fp = UNIXCB(skb).fp; 1233 skb->destructor = sock_wfree; 1234 UNIXCB(skb).fp = NULL; 1235 1236 for (i=scm->fp->count-1; i>=0; i--) 1237 unix_notinflight(scm->fp->fp[i]); 1238 } 1239 1240 static void unix_destruct_fds(struct sk_buff *skb) 1241 { 1242 struct scm_cookie scm; 1243 memset(&scm, 0, sizeof(scm)); 1244 unix_detach_fds(&scm, skb); 1245 1246 /* Alas, it calls VFS */ 1247 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1248 scm_destroy(&scm); 1249 sock_wfree(skb); 1250 } 1251 1252 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1253 { 1254 int i; 1255 for (i=scm->fp->count-1; i>=0; i--) 1256 unix_inflight(scm->fp->fp[i]); 1257 UNIXCB(skb).fp = scm->fp; 1258 skb->destructor = unix_destruct_fds; 1259 scm->fp = NULL; 1260 } 1261 1262 /* 1263 * Send AF_UNIX data. 1264 */ 1265 1266 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, 1267 struct msghdr *msg, size_t len) 1268 { 1269 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1270 struct sock *sk = sock->sk; 1271 struct unix_sock *u = unix_sk(sk); 1272 struct sockaddr_un *sunaddr=msg->msg_name; 1273 struct sock *other = NULL; 1274 int namelen = 0; /* fake GCC */ 1275 int err; 1276 unsigned hash; 1277 struct sk_buff *skb; 1278 long timeo; 1279 struct scm_cookie tmp_scm; 1280 1281 if (NULL == siocb->scm) 1282 siocb->scm = &tmp_scm; 1283 err = scm_send(sock, msg, siocb->scm); 1284 if (err < 0) 1285 return err; 1286 1287 err = -EOPNOTSUPP; 1288 if (msg->msg_flags&MSG_OOB) 1289 goto out; 1290 1291 if (msg->msg_namelen) { 1292 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1293 if (err < 0) 1294 goto out; 1295 namelen = err; 1296 } else { 1297 sunaddr = NULL; 1298 err = -ENOTCONN; 1299 other = unix_peer_get(sk); 1300 if (!other) 1301 goto out; 1302 } 1303 1304 if (test_bit(SOCK_PASSCRED, &sock->flags) 1305 && !u->addr && (err = unix_autobind(sock)) != 0) 1306 goto out; 1307 1308 err = -EMSGSIZE; 1309 if (len > sk->sk_sndbuf - 32) 1310 goto out; 1311 1312 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err); 1313 if (skb==NULL) 1314 goto out; 1315 1316 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1317 if (siocb->scm->fp) 1318 unix_attach_fds(siocb->scm, skb); 1319 unix_get_secdata(siocb->scm, skb); 1320 1321 skb_reset_transport_header(skb); 1322 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); 1323 if (err) 1324 goto out_free; 1325 1326 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1327 1328 restart: 1329 if (!other) { 1330 err = -ECONNRESET; 1331 if (sunaddr == NULL) 1332 goto out_free; 1333 1334 other = unix_find_other(sunaddr, namelen, sk->sk_type, 1335 hash, &err); 1336 if (other==NULL) 1337 goto out_free; 1338 } 1339 1340 unix_state_rlock(other); 1341 err = -EPERM; 1342 if (!unix_may_send(sk, other)) 1343 goto out_unlock; 1344 1345 if (sock_flag(other, SOCK_DEAD)) { 1346 /* 1347 * Check with 1003.1g - what should 1348 * datagram error 1349 */ 1350 unix_state_runlock(other); 1351 sock_put(other); 1352 1353 err = 0; 1354 unix_state_wlock(sk); 1355 if (unix_peer(sk) == other) { 1356 unix_peer(sk)=NULL; 1357 unix_state_wunlock(sk); 1358 1359 unix_dgram_disconnected(sk, other); 1360 sock_put(other); 1361 err = -ECONNREFUSED; 1362 } else { 1363 unix_state_wunlock(sk); 1364 } 1365 1366 other = NULL; 1367 if (err) 1368 goto out_free; 1369 goto restart; 1370 } 1371 1372 err = -EPIPE; 1373 if (other->sk_shutdown & RCV_SHUTDOWN) 1374 goto out_unlock; 1375 1376 if (sk->sk_type != SOCK_SEQPACKET) { 1377 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1378 if (err) 1379 goto out_unlock; 1380 } 1381 1382 if (unix_peer(other) != sk && 1383 (skb_queue_len(&other->sk_receive_queue) > 1384 other->sk_max_ack_backlog)) { 1385 if (!timeo) { 1386 err = -EAGAIN; 1387 goto out_unlock; 1388 } 1389 1390 timeo = unix_wait_for_peer(other, timeo); 1391 1392 err = sock_intr_errno(timeo); 1393 if (signal_pending(current)) 1394 goto out_free; 1395 1396 goto restart; 1397 } 1398 1399 skb_queue_tail(&other->sk_receive_queue, skb); 1400 unix_state_runlock(other); 1401 other->sk_data_ready(other, len); 1402 sock_put(other); 1403 scm_destroy(siocb->scm); 1404 return len; 1405 1406 out_unlock: 1407 unix_state_runlock(other); 1408 out_free: 1409 kfree_skb(skb); 1410 out: 1411 if (other) 1412 sock_put(other); 1413 scm_destroy(siocb->scm); 1414 return err; 1415 } 1416 1417 1418 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 1419 struct msghdr *msg, size_t len) 1420 { 1421 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1422 struct sock *sk = sock->sk; 1423 struct sock *other = NULL; 1424 struct sockaddr_un *sunaddr=msg->msg_name; 1425 int err,size; 1426 struct sk_buff *skb; 1427 int sent=0; 1428 struct scm_cookie tmp_scm; 1429 1430 if (NULL == siocb->scm) 1431 siocb->scm = &tmp_scm; 1432 err = scm_send(sock, msg, siocb->scm); 1433 if (err < 0) 1434 return err; 1435 1436 err = -EOPNOTSUPP; 1437 if (msg->msg_flags&MSG_OOB) 1438 goto out_err; 1439 1440 if (msg->msg_namelen) { 1441 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1442 goto out_err; 1443 } else { 1444 sunaddr = NULL; 1445 err = -ENOTCONN; 1446 other = unix_peer(sk); 1447 if (!other) 1448 goto out_err; 1449 } 1450 1451 if (sk->sk_shutdown & SEND_SHUTDOWN) 1452 goto pipe_err; 1453 1454 while(sent < len) 1455 { 1456 /* 1457 * Optimisation for the fact that under 0.01% of X 1458 * messages typically need breaking up. 1459 */ 1460 1461 size = len-sent; 1462 1463 /* Keep two messages in the pipe so it schedules better */ 1464 if (size > ((sk->sk_sndbuf >> 1) - 64)) 1465 size = (sk->sk_sndbuf >> 1) - 64; 1466 1467 if (size > SKB_MAX_ALLOC) 1468 size = SKB_MAX_ALLOC; 1469 1470 /* 1471 * Grab a buffer 1472 */ 1473 1474 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err); 1475 1476 if (skb==NULL) 1477 goto out_err; 1478 1479 /* 1480 * If you pass two values to the sock_alloc_send_skb 1481 * it tries to grab the large buffer with GFP_NOFS 1482 * (which can fail easily), and if it fails grab the 1483 * fallback size buffer which is under a page and will 1484 * succeed. [Alan] 1485 */ 1486 size = min_t(int, size, skb_tailroom(skb)); 1487 1488 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1489 if (siocb->scm->fp) 1490 unix_attach_fds(siocb->scm, skb); 1491 1492 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { 1493 kfree_skb(skb); 1494 goto out_err; 1495 } 1496 1497 unix_state_rlock(other); 1498 1499 if (sock_flag(other, SOCK_DEAD) || 1500 (other->sk_shutdown & RCV_SHUTDOWN)) 1501 goto pipe_err_free; 1502 1503 skb_queue_tail(&other->sk_receive_queue, skb); 1504 unix_state_runlock(other); 1505 other->sk_data_ready(other, size); 1506 sent+=size; 1507 } 1508 1509 scm_destroy(siocb->scm); 1510 siocb->scm = NULL; 1511 1512 return sent; 1513 1514 pipe_err_free: 1515 unix_state_runlock(other); 1516 kfree_skb(skb); 1517 pipe_err: 1518 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1519 send_sig(SIGPIPE,current,0); 1520 err = -EPIPE; 1521 out_err: 1522 scm_destroy(siocb->scm); 1523 siocb->scm = NULL; 1524 return sent ? : err; 1525 } 1526 1527 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, 1528 struct msghdr *msg, size_t len) 1529 { 1530 int err; 1531 struct sock *sk = sock->sk; 1532 1533 err = sock_error(sk); 1534 if (err) 1535 return err; 1536 1537 if (sk->sk_state != TCP_ESTABLISHED) 1538 return -ENOTCONN; 1539 1540 if (msg->msg_namelen) 1541 msg->msg_namelen = 0; 1542 1543 return unix_dgram_sendmsg(kiocb, sock, msg, len); 1544 } 1545 1546 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 1547 { 1548 struct unix_sock *u = unix_sk(sk); 1549 1550 msg->msg_namelen = 0; 1551 if (u->addr) { 1552 msg->msg_namelen = u->addr->len; 1553 memcpy(msg->msg_name, u->addr->name, u->addr->len); 1554 } 1555 } 1556 1557 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, 1558 struct msghdr *msg, size_t size, 1559 int flags) 1560 { 1561 struct sock_iocb *siocb = kiocb_to_siocb(iocb); 1562 struct scm_cookie tmp_scm; 1563 struct sock *sk = sock->sk; 1564 struct unix_sock *u = unix_sk(sk); 1565 int noblock = flags & MSG_DONTWAIT; 1566 struct sk_buff *skb; 1567 int err; 1568 1569 err = -EOPNOTSUPP; 1570 if (flags&MSG_OOB) 1571 goto out; 1572 1573 msg->msg_namelen = 0; 1574 1575 mutex_lock(&u->readlock); 1576 1577 skb = skb_recv_datagram(sk, flags, noblock, &err); 1578 if (!skb) 1579 goto out_unlock; 1580 1581 wake_up_interruptible(&u->peer_wait); 1582 1583 if (msg->msg_name) 1584 unix_copy_addr(msg, skb->sk); 1585 1586 if (size > skb->len) 1587 size = skb->len; 1588 else if (size < skb->len) 1589 msg->msg_flags |= MSG_TRUNC; 1590 1591 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size); 1592 if (err) 1593 goto out_free; 1594 1595 if (!siocb->scm) { 1596 siocb->scm = &tmp_scm; 1597 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1598 } 1599 siocb->scm->creds = *UNIXCREDS(skb); 1600 unix_set_secdata(siocb->scm, skb); 1601 1602 if (!(flags & MSG_PEEK)) 1603 { 1604 if (UNIXCB(skb).fp) 1605 unix_detach_fds(siocb->scm, skb); 1606 } 1607 else 1608 { 1609 /* It is questionable: on PEEK we could: 1610 - do not return fds - good, but too simple 8) 1611 - return fds, and do not return them on read (old strategy, 1612 apparently wrong) 1613 - clone fds (I chose it for now, it is the most universal 1614 solution) 1615 1616 POSIX 1003.1g does not actually define this clearly 1617 at all. POSIX 1003.1g doesn't define a lot of things 1618 clearly however! 1619 1620 */ 1621 if (UNIXCB(skb).fp) 1622 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1623 } 1624 err = size; 1625 1626 scm_recv(sock, msg, siocb->scm, flags); 1627 1628 out_free: 1629 skb_free_datagram(sk,skb); 1630 out_unlock: 1631 mutex_unlock(&u->readlock); 1632 out: 1633 return err; 1634 } 1635 1636 /* 1637 * Sleep until data has arrive. But check for races.. 1638 */ 1639 1640 static long unix_stream_data_wait(struct sock * sk, long timeo) 1641 { 1642 DEFINE_WAIT(wait); 1643 1644 unix_state_rlock(sk); 1645 1646 for (;;) { 1647 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1648 1649 if (!skb_queue_empty(&sk->sk_receive_queue) || 1650 sk->sk_err || 1651 (sk->sk_shutdown & RCV_SHUTDOWN) || 1652 signal_pending(current) || 1653 !timeo) 1654 break; 1655 1656 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1657 unix_state_runlock(sk); 1658 timeo = schedule_timeout(timeo); 1659 unix_state_rlock(sk); 1660 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1661 } 1662 1663 finish_wait(sk->sk_sleep, &wait); 1664 unix_state_runlock(sk); 1665 return timeo; 1666 } 1667 1668 1669 1670 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 1671 struct msghdr *msg, size_t size, 1672 int flags) 1673 { 1674 struct sock_iocb *siocb = kiocb_to_siocb(iocb); 1675 struct scm_cookie tmp_scm; 1676 struct sock *sk = sock->sk; 1677 struct unix_sock *u = unix_sk(sk); 1678 struct sockaddr_un *sunaddr=msg->msg_name; 1679 int copied = 0; 1680 int check_creds = 0; 1681 int target; 1682 int err = 0; 1683 long timeo; 1684 1685 err = -EINVAL; 1686 if (sk->sk_state != TCP_ESTABLISHED) 1687 goto out; 1688 1689 err = -EOPNOTSUPP; 1690 if (flags&MSG_OOB) 1691 goto out; 1692 1693 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); 1694 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); 1695 1696 msg->msg_namelen = 0; 1697 1698 /* Lock the socket to prevent queue disordering 1699 * while sleeps in memcpy_tomsg 1700 */ 1701 1702 if (!siocb->scm) { 1703 siocb->scm = &tmp_scm; 1704 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1705 } 1706 1707 mutex_lock(&u->readlock); 1708 1709 do 1710 { 1711 int chunk; 1712 struct sk_buff *skb; 1713 1714 skb = skb_dequeue(&sk->sk_receive_queue); 1715 if (skb==NULL) 1716 { 1717 if (copied >= target) 1718 break; 1719 1720 /* 1721 * POSIX 1003.1g mandates this order. 1722 */ 1723 1724 if ((err = sock_error(sk)) != 0) 1725 break; 1726 if (sk->sk_shutdown & RCV_SHUTDOWN) 1727 break; 1728 err = -EAGAIN; 1729 if (!timeo) 1730 break; 1731 mutex_unlock(&u->readlock); 1732 1733 timeo = unix_stream_data_wait(sk, timeo); 1734 1735 if (signal_pending(current)) { 1736 err = sock_intr_errno(timeo); 1737 goto out; 1738 } 1739 mutex_lock(&u->readlock); 1740 continue; 1741 } 1742 1743 if (check_creds) { 1744 /* Never glue messages from different writers */ 1745 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) { 1746 skb_queue_head(&sk->sk_receive_queue, skb); 1747 break; 1748 } 1749 } else { 1750 /* Copy credentials */ 1751 siocb->scm->creds = *UNIXCREDS(skb); 1752 check_creds = 1; 1753 } 1754 1755 /* Copy address just once */ 1756 if (sunaddr) 1757 { 1758 unix_copy_addr(msg, skb->sk); 1759 sunaddr = NULL; 1760 } 1761 1762 chunk = min_t(unsigned int, skb->len, size); 1763 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { 1764 skb_queue_head(&sk->sk_receive_queue, skb); 1765 if (copied == 0) 1766 copied = -EFAULT; 1767 break; 1768 } 1769 copied += chunk; 1770 size -= chunk; 1771 1772 /* Mark read part of skb as used */ 1773 if (!(flags & MSG_PEEK)) 1774 { 1775 skb_pull(skb, chunk); 1776 1777 if (UNIXCB(skb).fp) 1778 unix_detach_fds(siocb->scm, skb); 1779 1780 /* put the skb back if we didn't use it up.. */ 1781 if (skb->len) 1782 { 1783 skb_queue_head(&sk->sk_receive_queue, skb); 1784 break; 1785 } 1786 1787 kfree_skb(skb); 1788 1789 if (siocb->scm->fp) 1790 break; 1791 } 1792 else 1793 { 1794 /* It is questionable, see note in unix_dgram_recvmsg. 1795 */ 1796 if (UNIXCB(skb).fp) 1797 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1798 1799 /* put message back and return */ 1800 skb_queue_head(&sk->sk_receive_queue, skb); 1801 break; 1802 } 1803 } while (size); 1804 1805 mutex_unlock(&u->readlock); 1806 scm_recv(sock, msg, siocb->scm, flags); 1807 out: 1808 return copied ? : err; 1809 } 1810 1811 static int unix_shutdown(struct socket *sock, int mode) 1812 { 1813 struct sock *sk = sock->sk; 1814 struct sock *other; 1815 1816 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); 1817 1818 if (mode) { 1819 unix_state_wlock(sk); 1820 sk->sk_shutdown |= mode; 1821 other=unix_peer(sk); 1822 if (other) 1823 sock_hold(other); 1824 unix_state_wunlock(sk); 1825 sk->sk_state_change(sk); 1826 1827 if (other && 1828 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 1829 1830 int peer_mode = 0; 1831 1832 if (mode&RCV_SHUTDOWN) 1833 peer_mode |= SEND_SHUTDOWN; 1834 if (mode&SEND_SHUTDOWN) 1835 peer_mode |= RCV_SHUTDOWN; 1836 unix_state_wlock(other); 1837 other->sk_shutdown |= peer_mode; 1838 unix_state_wunlock(other); 1839 other->sk_state_change(other); 1840 read_lock(&other->sk_callback_lock); 1841 if (peer_mode == SHUTDOWN_MASK) 1842 sk_wake_async(other,1,POLL_HUP); 1843 else if (peer_mode & RCV_SHUTDOWN) 1844 sk_wake_async(other,1,POLL_IN); 1845 read_unlock(&other->sk_callback_lock); 1846 } 1847 if (other) 1848 sock_put(other); 1849 } 1850 return 0; 1851 } 1852 1853 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1854 { 1855 struct sock *sk = sock->sk; 1856 long amount=0; 1857 int err; 1858 1859 switch(cmd) 1860 { 1861 case SIOCOUTQ: 1862 amount = atomic_read(&sk->sk_wmem_alloc); 1863 err = put_user(amount, (int __user *)arg); 1864 break; 1865 case SIOCINQ: 1866 { 1867 struct sk_buff *skb; 1868 1869 if (sk->sk_state == TCP_LISTEN) { 1870 err = -EINVAL; 1871 break; 1872 } 1873 1874 spin_lock(&sk->sk_receive_queue.lock); 1875 if (sk->sk_type == SOCK_STREAM || 1876 sk->sk_type == SOCK_SEQPACKET) { 1877 skb_queue_walk(&sk->sk_receive_queue, skb) 1878 amount += skb->len; 1879 } else { 1880 skb = skb_peek(&sk->sk_receive_queue); 1881 if (skb) 1882 amount=skb->len; 1883 } 1884 spin_unlock(&sk->sk_receive_queue.lock); 1885 err = put_user(amount, (int __user *)arg); 1886 break; 1887 } 1888 1889 default: 1890 err = -ENOIOCTLCMD; 1891 break; 1892 } 1893 return err; 1894 } 1895 1896 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) 1897 { 1898 struct sock *sk = sock->sk; 1899 unsigned int mask; 1900 1901 poll_wait(file, sk->sk_sleep, wait); 1902 mask = 0; 1903 1904 /* exceptional events? */ 1905 if (sk->sk_err) 1906 mask |= POLLERR; 1907 if (sk->sk_shutdown == SHUTDOWN_MASK) 1908 mask |= POLLHUP; 1909 if (sk->sk_shutdown & RCV_SHUTDOWN) 1910 mask |= POLLRDHUP; 1911 1912 /* readable? */ 1913 if (!skb_queue_empty(&sk->sk_receive_queue) || 1914 (sk->sk_shutdown & RCV_SHUTDOWN)) 1915 mask |= POLLIN | POLLRDNORM; 1916 1917 /* Connection-based need to check for termination and startup */ 1918 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE) 1919 mask |= POLLHUP; 1920 1921 /* 1922 * we set writable also when the other side has shut down the 1923 * connection. This prevents stuck sockets. 1924 */ 1925 if (unix_writable(sk)) 1926 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 1927 1928 return mask; 1929 } 1930 1931 1932 #ifdef CONFIG_PROC_FS 1933 static struct sock *unix_seq_idx(int *iter, loff_t pos) 1934 { 1935 loff_t off = 0; 1936 struct sock *s; 1937 1938 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { 1939 if (off == pos) 1940 return s; 1941 ++off; 1942 } 1943 return NULL; 1944 } 1945 1946 1947 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 1948 { 1949 spin_lock(&unix_table_lock); 1950 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); 1951 } 1952 1953 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1954 { 1955 ++*pos; 1956 1957 if (v == (void *)1) 1958 return first_unix_socket(seq->private); 1959 return next_unix_socket(seq->private, v); 1960 } 1961 1962 static void unix_seq_stop(struct seq_file *seq, void *v) 1963 { 1964 spin_unlock(&unix_table_lock); 1965 } 1966 1967 static int unix_seq_show(struct seq_file *seq, void *v) 1968 { 1969 1970 if (v == (void *)1) 1971 seq_puts(seq, "Num RefCount Protocol Flags Type St " 1972 "Inode Path\n"); 1973 else { 1974 struct sock *s = v; 1975 struct unix_sock *u = unix_sk(s); 1976 unix_state_rlock(s); 1977 1978 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu", 1979 s, 1980 atomic_read(&s->sk_refcnt), 1981 0, 1982 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 1983 s->sk_type, 1984 s->sk_socket ? 1985 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 1986 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 1987 sock_i_ino(s)); 1988 1989 if (u->addr) { 1990 int i, len; 1991 seq_putc(seq, ' '); 1992 1993 i = 0; 1994 len = u->addr->len - sizeof(short); 1995 if (!UNIX_ABSTRACT(s)) 1996 len--; 1997 else { 1998 seq_putc(seq, '@'); 1999 i++; 2000 } 2001 for ( ; i < len; i++) 2002 seq_putc(seq, u->addr->name->sun_path[i]); 2003 } 2004 unix_state_runlock(s); 2005 seq_putc(seq, '\n'); 2006 } 2007 2008 return 0; 2009 } 2010 2011 static struct seq_operations unix_seq_ops = { 2012 .start = unix_seq_start, 2013 .next = unix_seq_next, 2014 .stop = unix_seq_stop, 2015 .show = unix_seq_show, 2016 }; 2017 2018 2019 static int unix_seq_open(struct inode *inode, struct file *file) 2020 { 2021 struct seq_file *seq; 2022 int rc = -ENOMEM; 2023 int *iter = kmalloc(sizeof(int), GFP_KERNEL); 2024 2025 if (!iter) 2026 goto out; 2027 2028 rc = seq_open(file, &unix_seq_ops); 2029 if (rc) 2030 goto out_kfree; 2031 2032 seq = file->private_data; 2033 seq->private = iter; 2034 *iter = 0; 2035 out: 2036 return rc; 2037 out_kfree: 2038 kfree(iter); 2039 goto out; 2040 } 2041 2042 static const struct file_operations unix_seq_fops = { 2043 .owner = THIS_MODULE, 2044 .open = unix_seq_open, 2045 .read = seq_read, 2046 .llseek = seq_lseek, 2047 .release = seq_release_private, 2048 }; 2049 2050 #endif 2051 2052 static struct net_proto_family unix_family_ops = { 2053 .family = PF_UNIX, 2054 .create = unix_create, 2055 .owner = THIS_MODULE, 2056 }; 2057 2058 static int __init af_unix_init(void) 2059 { 2060 int rc = -1; 2061 struct sk_buff *dummy_skb; 2062 2063 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)); 2064 2065 rc = proto_register(&unix_proto, 1); 2066 if (rc != 0) { 2067 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n", 2068 __FUNCTION__); 2069 goto out; 2070 } 2071 2072 sock_register(&unix_family_ops); 2073 #ifdef CONFIG_PROC_FS 2074 proc_net_fops_create("unix", 0, &unix_seq_fops); 2075 #endif 2076 unix_sysctl_register(); 2077 out: 2078 return rc; 2079 } 2080 2081 static void __exit af_unix_exit(void) 2082 { 2083 sock_unregister(PF_UNIX); 2084 unix_sysctl_unregister(); 2085 proc_net_remove("unix"); 2086 proto_unregister(&unix_proto); 2087 } 2088 2089 module_init(af_unix_init); 2090 module_exit(af_unix_exit); 2091 2092 MODULE_LICENSE("GPL"); 2093 MODULE_ALIAS_NETPROTO(PF_UNIX); 2094