xref: /openbmc/linux/net/unix/af_unix.c (revision 9ce7677c)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan.cox@linux.org>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Version:	$Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *		Linus Torvalds	:	Assorted bug cures.
15  *		Niibe Yutaka	:	async I/O support.
16  *		Carsten Paeth	:	PF_UNIX check, address fixes.
17  *		Alan Cox	:	Limit size of allocated blocks.
18  *		Alan Cox	:	Fixed the stupid socketpair bug.
19  *		Alan Cox	:	BSD compatibility fine tuning.
20  *		Alan Cox	:	Fixed a bug in connect when interrupted.
21  *		Alan Cox	:	Sorted out a proper draft version of
22  *					file descriptor passing hacked up from
23  *					Mike Shaver's work.
24  *		Marty Leisner	:	Fixes to fd passing
25  *		Nick Nevin	:	recvmsg bugfix.
26  *		Alan Cox	:	Started proper garbage collector
27  *		Heiko EiBfeldt	:	Missing verify_area check
28  *		Alan Cox	:	Started POSIXisms
29  *		Andreas Schwab	:	Replace inode by dentry for proper
30  *					reference counting
31  *		Kirk Petersen	:	Made this a module
32  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
33  *					Lots of bug fixes.
34  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
35  *					by above two patches.
36  *	     Andrea Arcangeli	:	If possible we block in connect(2)
37  *					if the max backlog of the listen socket
38  *					is been reached. This won't break
39  *					old apps and it will avoid huge amount
40  *					of socks hashed (this for unix_gc()
41  *					performances reasons).
42  *					Security fix that limits the max
43  *					number of socks to 2*max_files and
44  *					the number of skb queueable in the
45  *					dgram receiver.
46  *		Artur Skawina   :	Hash function optimizations
47  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
48  *	      Malcolm Beattie   :	Set peercred for socketpair
49  *	     Michal Ostrowski   :       Module initialization cleanup.
50  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
51  *	     				the core infrastructure is doing that
52  *	     				for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *	[TO FIX]
58  *	ECONNREFUSED is not returned from one end of a connected() socket to the
59  *		other the moment one end closes.
60  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *	[NOT TO FIX]
63  *	accept() returns a path name even if the connecting socket has closed
64  *		in the meantime (BSD loses the path and gives up).
65  *	accept() returns 0 length path for an unbound connector. BSD returns 16
66  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *	BSD af_unix apparently has connect forgetting to block properly.
69  *		(need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *	Bug fixes and improvements.
73  *		- client shutdown killed server socket.
74  *		- removed all useless cli/sti pairs.
75  *
76  *	Semantic changes/extensions.
77  *		- generic control message passing.
78  *		- SCM_CREDENTIALS control message.
79  *		- "Abstract" (not FS based) socket bindings.
80  *		  Abstract names are sequences of bytes (not zero terminated)
81  *		  started by 0, so that this name space does not intersect
82  *		  with BSD names.
83  */
84 
85 #include <linux/module.h>
86 #include <linux/config.h>
87 #include <linux/kernel.h>
88 #include <linux/signal.h>
89 #include <linux/sched.h>
90 #include <linux/errno.h>
91 #include <linux/string.h>
92 #include <linux/stat.h>
93 #include <linux/dcache.h>
94 #include <linux/namei.h>
95 #include <linux/socket.h>
96 #include <linux/un.h>
97 #include <linux/fcntl.h>
98 #include <linux/termios.h>
99 #include <linux/sockios.h>
100 #include <linux/net.h>
101 #include <linux/in.h>
102 #include <linux/fs.h>
103 #include <linux/slab.h>
104 #include <asm/uaccess.h>
105 #include <linux/skbuff.h>
106 #include <linux/netdevice.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/smp_lock.h>
116 #include <linux/rtnetlink.h>
117 #include <linux/mount.h>
118 #include <net/checksum.h>
119 #include <linux/security.h>
120 
121 int sysctl_unix_max_dgram_qlen = 10;
122 
123 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
124 DEFINE_RWLOCK(unix_table_lock);
125 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
126 
127 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
128 
129 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
130 
131 /*
132  *  SMP locking strategy:
133  *    hash table is protected with rwlock unix_table_lock
134  *    each socket state is protected by separate rwlock.
135  */
136 
137 static inline unsigned unix_hash_fold(unsigned hash)
138 {
139 	hash ^= hash>>16;
140 	hash ^= hash>>8;
141 	return hash&(UNIX_HASH_SIZE-1);
142 }
143 
144 #define unix_peer(sk) (unix_sk(sk)->peer)
145 
146 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
147 {
148 	return unix_peer(osk) == sk;
149 }
150 
151 static inline int unix_may_send(struct sock *sk, struct sock *osk)
152 {
153 	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
154 }
155 
156 static struct sock *unix_peer_get(struct sock *s)
157 {
158 	struct sock *peer;
159 
160 	unix_state_rlock(s);
161 	peer = unix_peer(s);
162 	if (peer)
163 		sock_hold(peer);
164 	unix_state_runlock(s);
165 	return peer;
166 }
167 
168 static inline void unix_release_addr(struct unix_address *addr)
169 {
170 	if (atomic_dec_and_test(&addr->refcnt))
171 		kfree(addr);
172 }
173 
174 /*
175  *	Check unix socket name:
176  *		- should be not zero length.
177  *	        - if started by not zero, should be NULL terminated (FS object)
178  *		- if started by zero, it is abstract name.
179  */
180 
181 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
182 {
183 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
184 		return -EINVAL;
185 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
186 		return -EINVAL;
187 	if (sunaddr->sun_path[0]) {
188 		/*
189 		 * This may look like an off by one error but it is a bit more
190 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
191 		 * sun_path[108] doesnt as such exist.  However in kernel space
192 		 * we are guaranteed that it is a valid memory location in our
193 		 * kernel address buffer.
194 		 */
195 		((char *)sunaddr)[len]=0;
196 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
197 		return len;
198 	}
199 
200 	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
201 	return len;
202 }
203 
204 static void __unix_remove_socket(struct sock *sk)
205 {
206 	sk_del_node_init(sk);
207 }
208 
209 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
210 {
211 	BUG_TRAP(sk_unhashed(sk));
212 	sk_add_node(sk, list);
213 }
214 
215 static inline void unix_remove_socket(struct sock *sk)
216 {
217 	write_lock(&unix_table_lock);
218 	__unix_remove_socket(sk);
219 	write_unlock(&unix_table_lock);
220 }
221 
222 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
223 {
224 	write_lock(&unix_table_lock);
225 	__unix_insert_socket(list, sk);
226 	write_unlock(&unix_table_lock);
227 }
228 
229 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
230 					      int len, int type, unsigned hash)
231 {
232 	struct sock *s;
233 	struct hlist_node *node;
234 
235 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
236 		struct unix_sock *u = unix_sk(s);
237 
238 		if (u->addr->len == len &&
239 		    !memcmp(u->addr->name, sunname, len))
240 			goto found;
241 	}
242 	s = NULL;
243 found:
244 	return s;
245 }
246 
247 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
248 						   int len, int type,
249 						   unsigned hash)
250 {
251 	struct sock *s;
252 
253 	read_lock(&unix_table_lock);
254 	s = __unix_find_socket_byname(sunname, len, type, hash);
255 	if (s)
256 		sock_hold(s);
257 	read_unlock(&unix_table_lock);
258 	return s;
259 }
260 
261 static struct sock *unix_find_socket_byinode(struct inode *i)
262 {
263 	struct sock *s;
264 	struct hlist_node *node;
265 
266 	read_lock(&unix_table_lock);
267 	sk_for_each(s, node,
268 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
269 		struct dentry *dentry = unix_sk(s)->dentry;
270 
271 		if(dentry && dentry->d_inode == i)
272 		{
273 			sock_hold(s);
274 			goto found;
275 		}
276 	}
277 	s = NULL;
278 found:
279 	read_unlock(&unix_table_lock);
280 	return s;
281 }
282 
283 static inline int unix_writable(struct sock *sk)
284 {
285 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
286 }
287 
288 static void unix_write_space(struct sock *sk)
289 {
290 	read_lock(&sk->sk_callback_lock);
291 	if (unix_writable(sk)) {
292 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
293 			wake_up_interruptible(sk->sk_sleep);
294 		sk_wake_async(sk, 2, POLL_OUT);
295 	}
296 	read_unlock(&sk->sk_callback_lock);
297 }
298 
299 /* When dgram socket disconnects (or changes its peer), we clear its receive
300  * queue of packets arrived from previous peer. First, it allows to do
301  * flow control based only on wmem_alloc; second, sk connected to peer
302  * may receive messages only from that peer. */
303 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
304 {
305 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
306 		skb_queue_purge(&sk->sk_receive_queue);
307 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
308 
309 		/* If one link of bidirectional dgram pipe is disconnected,
310 		 * we signal error. Messages are lost. Do not make this,
311 		 * when peer was not connected to us.
312 		 */
313 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
314 			other->sk_err = ECONNRESET;
315 			other->sk_error_report(other);
316 		}
317 	}
318 }
319 
320 static void unix_sock_destructor(struct sock *sk)
321 {
322 	struct unix_sock *u = unix_sk(sk);
323 
324 	skb_queue_purge(&sk->sk_receive_queue);
325 
326 	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
327 	BUG_TRAP(sk_unhashed(sk));
328 	BUG_TRAP(!sk->sk_socket);
329 	if (!sock_flag(sk, SOCK_DEAD)) {
330 		printk("Attempt to release alive unix socket: %p\n", sk);
331 		return;
332 	}
333 
334 	if (u->addr)
335 		unix_release_addr(u->addr);
336 
337 	atomic_dec(&unix_nr_socks);
338 #ifdef UNIX_REFCNT_DEBUG
339 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
340 #endif
341 }
342 
343 static int unix_release_sock (struct sock *sk, int embrion)
344 {
345 	struct unix_sock *u = unix_sk(sk);
346 	struct dentry *dentry;
347 	struct vfsmount *mnt;
348 	struct sock *skpair;
349 	struct sk_buff *skb;
350 	int state;
351 
352 	unix_remove_socket(sk);
353 
354 	/* Clear state */
355 	unix_state_wlock(sk);
356 	sock_orphan(sk);
357 	sk->sk_shutdown = SHUTDOWN_MASK;
358 	dentry	     = u->dentry;
359 	u->dentry    = NULL;
360 	mnt	     = u->mnt;
361 	u->mnt	     = NULL;
362 	state = sk->sk_state;
363 	sk->sk_state = TCP_CLOSE;
364 	unix_state_wunlock(sk);
365 
366 	wake_up_interruptible_all(&u->peer_wait);
367 
368 	skpair=unix_peer(sk);
369 
370 	if (skpair!=NULL) {
371 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
372 			unix_state_wlock(skpair);
373 			/* No more writes */
374 			skpair->sk_shutdown = SHUTDOWN_MASK;
375 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
376 				skpair->sk_err = ECONNRESET;
377 			unix_state_wunlock(skpair);
378 			skpair->sk_state_change(skpair);
379 			read_lock(&skpair->sk_callback_lock);
380 			sk_wake_async(skpair,1,POLL_HUP);
381 			read_unlock(&skpair->sk_callback_lock);
382 		}
383 		sock_put(skpair); /* It may now die */
384 		unix_peer(sk) = NULL;
385 	}
386 
387 	/* Try to flush out this socket. Throw out buffers at least */
388 
389 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
390 		if (state==TCP_LISTEN)
391 			unix_release_sock(skb->sk, 1);
392 		/* passed fds are erased in the kfree_skb hook	      */
393 		kfree_skb(skb);
394 	}
395 
396 	if (dentry) {
397 		dput(dentry);
398 		mntput(mnt);
399 	}
400 
401 	sock_put(sk);
402 
403 	/* ---- Socket is dead now and most probably destroyed ---- */
404 
405 	/*
406 	 * Fixme: BSD difference: In BSD all sockets connected to use get
407 	 *	  ECONNRESET and we die on the spot. In Linux we behave
408 	 *	  like files and pipes do and wait for the last
409 	 *	  dereference.
410 	 *
411 	 * Can't we simply set sock->err?
412 	 *
413 	 *	  What the above comment does talk about? --ANK(980817)
414 	 */
415 
416 	if (atomic_read(&unix_tot_inflight))
417 		unix_gc();		/* Garbage collect fds */
418 
419 	return 0;
420 }
421 
422 static int unix_listen(struct socket *sock, int backlog)
423 {
424 	int err;
425 	struct sock *sk = sock->sk;
426 	struct unix_sock *u = unix_sk(sk);
427 
428 	err = -EOPNOTSUPP;
429 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
430 		goto out;			/* Only stream/seqpacket sockets accept */
431 	err = -EINVAL;
432 	if (!u->addr)
433 		goto out;			/* No listens on an unbound socket */
434 	unix_state_wlock(sk);
435 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
436 		goto out_unlock;
437 	if (backlog > sk->sk_max_ack_backlog)
438 		wake_up_interruptible_all(&u->peer_wait);
439 	sk->sk_max_ack_backlog	= backlog;
440 	sk->sk_state		= TCP_LISTEN;
441 	/* set credentials so connect can copy them */
442 	sk->sk_peercred.pid	= current->tgid;
443 	sk->sk_peercred.uid	= current->euid;
444 	sk->sk_peercred.gid	= current->egid;
445 	err = 0;
446 
447 out_unlock:
448 	unix_state_wunlock(sk);
449 out:
450 	return err;
451 }
452 
453 static int unix_release(struct socket *);
454 static int unix_bind(struct socket *, struct sockaddr *, int);
455 static int unix_stream_connect(struct socket *, struct sockaddr *,
456 			       int addr_len, int flags);
457 static int unix_socketpair(struct socket *, struct socket *);
458 static int unix_accept(struct socket *, struct socket *, int);
459 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
460 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
461 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
462 static int unix_shutdown(struct socket *, int);
463 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
464 			       struct msghdr *, size_t);
465 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
466 			       struct msghdr *, size_t, int);
467 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
468 			      struct msghdr *, size_t);
469 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
470 			      struct msghdr *, size_t, int);
471 static int unix_dgram_connect(struct socket *, struct sockaddr *,
472 			      int, int);
473 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
474 				  struct msghdr *, size_t);
475 
476 static struct proto_ops unix_stream_ops = {
477 	.family =	PF_UNIX,
478 	.owner =	THIS_MODULE,
479 	.release =	unix_release,
480 	.bind =		unix_bind,
481 	.connect =	unix_stream_connect,
482 	.socketpair =	unix_socketpair,
483 	.accept =	unix_accept,
484 	.getname =	unix_getname,
485 	.poll =		unix_poll,
486 	.ioctl =	unix_ioctl,
487 	.listen =	unix_listen,
488 	.shutdown =	unix_shutdown,
489 	.setsockopt =	sock_no_setsockopt,
490 	.getsockopt =	sock_no_getsockopt,
491 	.sendmsg =	unix_stream_sendmsg,
492 	.recvmsg =	unix_stream_recvmsg,
493 	.mmap =		sock_no_mmap,
494 	.sendpage =	sock_no_sendpage,
495 };
496 
497 static struct proto_ops unix_dgram_ops = {
498 	.family =	PF_UNIX,
499 	.owner =	THIS_MODULE,
500 	.release =	unix_release,
501 	.bind =		unix_bind,
502 	.connect =	unix_dgram_connect,
503 	.socketpair =	unix_socketpair,
504 	.accept =	sock_no_accept,
505 	.getname =	unix_getname,
506 	.poll =		datagram_poll,
507 	.ioctl =	unix_ioctl,
508 	.listen =	sock_no_listen,
509 	.shutdown =	unix_shutdown,
510 	.setsockopt =	sock_no_setsockopt,
511 	.getsockopt =	sock_no_getsockopt,
512 	.sendmsg =	unix_dgram_sendmsg,
513 	.recvmsg =	unix_dgram_recvmsg,
514 	.mmap =		sock_no_mmap,
515 	.sendpage =	sock_no_sendpage,
516 };
517 
518 static struct proto_ops unix_seqpacket_ops = {
519 	.family =	PF_UNIX,
520 	.owner =	THIS_MODULE,
521 	.release =	unix_release,
522 	.bind =		unix_bind,
523 	.connect =	unix_stream_connect,
524 	.socketpair =	unix_socketpair,
525 	.accept =	unix_accept,
526 	.getname =	unix_getname,
527 	.poll =		datagram_poll,
528 	.ioctl =	unix_ioctl,
529 	.listen =	unix_listen,
530 	.shutdown =	unix_shutdown,
531 	.setsockopt =	sock_no_setsockopt,
532 	.getsockopt =	sock_no_getsockopt,
533 	.sendmsg =	unix_seqpacket_sendmsg,
534 	.recvmsg =	unix_dgram_recvmsg,
535 	.mmap =		sock_no_mmap,
536 	.sendpage =	sock_no_sendpage,
537 };
538 
539 static struct proto unix_proto = {
540 	.name	  = "UNIX",
541 	.owner	  = THIS_MODULE,
542 	.obj_size = sizeof(struct unix_sock),
543 };
544 
545 static struct sock * unix_create1(struct socket *sock)
546 {
547 	struct sock *sk = NULL;
548 	struct unix_sock *u;
549 
550 	if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
551 		goto out;
552 
553 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
554 	if (!sk)
555 		goto out;
556 
557 	atomic_inc(&unix_nr_socks);
558 
559 	sock_init_data(sock,sk);
560 
561 	sk->sk_write_space	= unix_write_space;
562 	sk->sk_max_ack_backlog	= sysctl_unix_max_dgram_qlen;
563 	sk->sk_destruct		= unix_sock_destructor;
564 	u	  = unix_sk(sk);
565 	u->dentry = NULL;
566 	u->mnt	  = NULL;
567 	rwlock_init(&u->lock);
568 	atomic_set(&u->inflight, sock ? 0 : -1);
569 	init_MUTEX(&u->readsem); /* single task reading lock */
570 	init_waitqueue_head(&u->peer_wait);
571 	unix_insert_socket(unix_sockets_unbound, sk);
572 out:
573 	return sk;
574 }
575 
576 static int unix_create(struct socket *sock, int protocol)
577 {
578 	if (protocol && protocol != PF_UNIX)
579 		return -EPROTONOSUPPORT;
580 
581 	sock->state = SS_UNCONNECTED;
582 
583 	switch (sock->type) {
584 	case SOCK_STREAM:
585 		sock->ops = &unix_stream_ops;
586 		break;
587 		/*
588 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
589 		 *	nothing uses it.
590 		 */
591 	case SOCK_RAW:
592 		sock->type=SOCK_DGRAM;
593 	case SOCK_DGRAM:
594 		sock->ops = &unix_dgram_ops;
595 		break;
596 	case SOCK_SEQPACKET:
597 		sock->ops = &unix_seqpacket_ops;
598 		break;
599 	default:
600 		return -ESOCKTNOSUPPORT;
601 	}
602 
603 	return unix_create1(sock) ? 0 : -ENOMEM;
604 }
605 
606 static int unix_release(struct socket *sock)
607 {
608 	struct sock *sk = sock->sk;
609 
610 	if (!sk)
611 		return 0;
612 
613 	sock->sk = NULL;
614 
615 	return unix_release_sock (sk, 0);
616 }
617 
618 static int unix_autobind(struct socket *sock)
619 {
620 	struct sock *sk = sock->sk;
621 	struct unix_sock *u = unix_sk(sk);
622 	static u32 ordernum = 1;
623 	struct unix_address * addr;
624 	int err;
625 
626 	down(&u->readsem);
627 
628 	err = 0;
629 	if (u->addr)
630 		goto out;
631 
632 	err = -ENOMEM;
633 	addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
634 	if (!addr)
635 		goto out;
636 
637 	memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
638 	addr->name->sun_family = AF_UNIX;
639 	atomic_set(&addr->refcnt, 1);
640 
641 retry:
642 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
643 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
644 
645 	write_lock(&unix_table_lock);
646 	ordernum = (ordernum+1)&0xFFFFF;
647 
648 	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
649 				      addr->hash)) {
650 		write_unlock(&unix_table_lock);
651 		/* Sanity yield. It is unusual case, but yet... */
652 		if (!(ordernum&0xFF))
653 			yield();
654 		goto retry;
655 	}
656 	addr->hash ^= sk->sk_type;
657 
658 	__unix_remove_socket(sk);
659 	u->addr = addr;
660 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
661 	write_unlock(&unix_table_lock);
662 	err = 0;
663 
664 out:	up(&u->readsem);
665 	return err;
666 }
667 
668 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
669 				    int type, unsigned hash, int *error)
670 {
671 	struct sock *u;
672 	struct nameidata nd;
673 	int err = 0;
674 
675 	if (sunname->sun_path[0]) {
676 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
677 		if (err)
678 			goto fail;
679 		err = vfs_permission(&nd, MAY_WRITE);
680 		if (err)
681 			goto put_fail;
682 
683 		err = -ECONNREFUSED;
684 		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
685 			goto put_fail;
686 		u=unix_find_socket_byinode(nd.dentry->d_inode);
687 		if (!u)
688 			goto put_fail;
689 
690 		if (u->sk_type == type)
691 			touch_atime(nd.mnt, nd.dentry);
692 
693 		path_release(&nd);
694 
695 		err=-EPROTOTYPE;
696 		if (u->sk_type != type) {
697 			sock_put(u);
698 			goto fail;
699 		}
700 	} else {
701 		err = -ECONNREFUSED;
702 		u=unix_find_socket_byname(sunname, len, type, hash);
703 		if (u) {
704 			struct dentry *dentry;
705 			dentry = unix_sk(u)->dentry;
706 			if (dentry)
707 				touch_atime(unix_sk(u)->mnt, dentry);
708 		} else
709 			goto fail;
710 	}
711 	return u;
712 
713 put_fail:
714 	path_release(&nd);
715 fail:
716 	*error=err;
717 	return NULL;
718 }
719 
720 
721 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
722 {
723 	struct sock *sk = sock->sk;
724 	struct unix_sock *u = unix_sk(sk);
725 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
726 	struct dentry * dentry = NULL;
727 	struct nameidata nd;
728 	int err;
729 	unsigned hash;
730 	struct unix_address *addr;
731 	struct hlist_head *list;
732 
733 	err = -EINVAL;
734 	if (sunaddr->sun_family != AF_UNIX)
735 		goto out;
736 
737 	if (addr_len==sizeof(short)) {
738 		err = unix_autobind(sock);
739 		goto out;
740 	}
741 
742 	err = unix_mkname(sunaddr, addr_len, &hash);
743 	if (err < 0)
744 		goto out;
745 	addr_len = err;
746 
747 	down(&u->readsem);
748 
749 	err = -EINVAL;
750 	if (u->addr)
751 		goto out_up;
752 
753 	err = -ENOMEM;
754 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
755 	if (!addr)
756 		goto out_up;
757 
758 	memcpy(addr->name, sunaddr, addr_len);
759 	addr->len = addr_len;
760 	addr->hash = hash ^ sk->sk_type;
761 	atomic_set(&addr->refcnt, 1);
762 
763 	if (sunaddr->sun_path[0]) {
764 		unsigned int mode;
765 		err = 0;
766 		/*
767 		 * Get the parent directory, calculate the hash for last
768 		 * component.
769 		 */
770 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
771 		if (err)
772 			goto out_mknod_parent;
773 
774 		dentry = lookup_create(&nd, 0);
775 		err = PTR_ERR(dentry);
776 		if (IS_ERR(dentry))
777 			goto out_mknod_unlock;
778 
779 		/*
780 		 * All right, let's create it.
781 		 */
782 		mode = S_IFSOCK |
783 		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
784 		err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
785 		if (err)
786 			goto out_mknod_dput;
787 		up(&nd.dentry->d_inode->i_sem);
788 		dput(nd.dentry);
789 		nd.dentry = dentry;
790 
791 		addr->hash = UNIX_HASH_SIZE;
792 	}
793 
794 	write_lock(&unix_table_lock);
795 
796 	if (!sunaddr->sun_path[0]) {
797 		err = -EADDRINUSE;
798 		if (__unix_find_socket_byname(sunaddr, addr_len,
799 					      sk->sk_type, hash)) {
800 			unix_release_addr(addr);
801 			goto out_unlock;
802 		}
803 
804 		list = &unix_socket_table[addr->hash];
805 	} else {
806 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
807 		u->dentry = nd.dentry;
808 		u->mnt    = nd.mnt;
809 	}
810 
811 	err = 0;
812 	__unix_remove_socket(sk);
813 	u->addr = addr;
814 	__unix_insert_socket(list, sk);
815 
816 out_unlock:
817 	write_unlock(&unix_table_lock);
818 out_up:
819 	up(&u->readsem);
820 out:
821 	return err;
822 
823 out_mknod_dput:
824 	dput(dentry);
825 out_mknod_unlock:
826 	up(&nd.dentry->d_inode->i_sem);
827 	path_release(&nd);
828 out_mknod_parent:
829 	if (err==-EEXIST)
830 		err=-EADDRINUSE;
831 	unix_release_addr(addr);
832 	goto out_up;
833 }
834 
835 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
836 			      int alen, int flags)
837 {
838 	struct sock *sk = sock->sk;
839 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
840 	struct sock *other;
841 	unsigned hash;
842 	int err;
843 
844 	if (addr->sa_family != AF_UNSPEC) {
845 		err = unix_mkname(sunaddr, alen, &hash);
846 		if (err < 0)
847 			goto out;
848 		alen = err;
849 
850 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
851 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
852 			goto out;
853 
854 		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
855 		if (!other)
856 			goto out;
857 
858 		unix_state_wlock(sk);
859 
860 		err = -EPERM;
861 		if (!unix_may_send(sk, other))
862 			goto out_unlock;
863 
864 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
865 		if (err)
866 			goto out_unlock;
867 
868 	} else {
869 		/*
870 		 *	1003.1g breaking connected state with AF_UNSPEC
871 		 */
872 		other = NULL;
873 		unix_state_wlock(sk);
874 	}
875 
876 	/*
877 	 * If it was connected, reconnect.
878 	 */
879 	if (unix_peer(sk)) {
880 		struct sock *old_peer = unix_peer(sk);
881 		unix_peer(sk)=other;
882 		unix_state_wunlock(sk);
883 
884 		if (other != old_peer)
885 			unix_dgram_disconnected(sk, old_peer);
886 		sock_put(old_peer);
887 	} else {
888 		unix_peer(sk)=other;
889 		unix_state_wunlock(sk);
890 	}
891  	return 0;
892 
893 out_unlock:
894 	unix_state_wunlock(sk);
895 	sock_put(other);
896 out:
897 	return err;
898 }
899 
900 static long unix_wait_for_peer(struct sock *other, long timeo)
901 {
902 	struct unix_sock *u = unix_sk(other);
903 	int sched;
904 	DEFINE_WAIT(wait);
905 
906 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
907 
908 	sched = !sock_flag(other, SOCK_DEAD) &&
909 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
910 		(skb_queue_len(&other->sk_receive_queue) >
911 		 other->sk_max_ack_backlog);
912 
913 	unix_state_runlock(other);
914 
915 	if (sched)
916 		timeo = schedule_timeout(timeo);
917 
918 	finish_wait(&u->peer_wait, &wait);
919 	return timeo;
920 }
921 
922 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
923 			       int addr_len, int flags)
924 {
925 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
926 	struct sock *sk = sock->sk;
927 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
928 	struct sock *newsk = NULL;
929 	struct sock *other = NULL;
930 	struct sk_buff *skb = NULL;
931 	unsigned hash;
932 	int st;
933 	int err;
934 	long timeo;
935 
936 	err = unix_mkname(sunaddr, addr_len, &hash);
937 	if (err < 0)
938 		goto out;
939 	addr_len = err;
940 
941 	if (test_bit(SOCK_PASSCRED, &sock->flags)
942 		&& !u->addr && (err = unix_autobind(sock)) != 0)
943 		goto out;
944 
945 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
946 
947 	/* First of all allocate resources.
948 	   If we will make it after state is locked,
949 	   we will have to recheck all again in any case.
950 	 */
951 
952 	err = -ENOMEM;
953 
954 	/* create new sock for complete connection */
955 	newsk = unix_create1(NULL);
956 	if (newsk == NULL)
957 		goto out;
958 
959 	/* Allocate skb for sending to listening sock */
960 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
961 	if (skb == NULL)
962 		goto out;
963 
964 restart:
965 	/*  Find listening sock. */
966 	other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
967 	if (!other)
968 		goto out;
969 
970 	/* Latch state of peer */
971 	unix_state_rlock(other);
972 
973 	/* Apparently VFS overslept socket death. Retry. */
974 	if (sock_flag(other, SOCK_DEAD)) {
975 		unix_state_runlock(other);
976 		sock_put(other);
977 		goto restart;
978 	}
979 
980 	err = -ECONNREFUSED;
981 	if (other->sk_state != TCP_LISTEN)
982 		goto out_unlock;
983 
984 	if (skb_queue_len(&other->sk_receive_queue) >
985 	    other->sk_max_ack_backlog) {
986 		err = -EAGAIN;
987 		if (!timeo)
988 			goto out_unlock;
989 
990 		timeo = unix_wait_for_peer(other, timeo);
991 
992 		err = sock_intr_errno(timeo);
993 		if (signal_pending(current))
994 			goto out;
995 		sock_put(other);
996 		goto restart;
997         }
998 
999 	/* Latch our state.
1000 
1001 	   It is tricky place. We need to grab write lock and cannot
1002 	   drop lock on peer. It is dangerous because deadlock is
1003 	   possible. Connect to self case and simultaneous
1004 	   attempt to connect are eliminated by checking socket
1005 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1006 	   check this before attempt to grab lock.
1007 
1008 	   Well, and we have to recheck the state after socket locked.
1009 	 */
1010 	st = sk->sk_state;
1011 
1012 	switch (st) {
1013 	case TCP_CLOSE:
1014 		/* This is ok... continue with connect */
1015 		break;
1016 	case TCP_ESTABLISHED:
1017 		/* Socket is already connected */
1018 		err = -EISCONN;
1019 		goto out_unlock;
1020 	default:
1021 		err = -EINVAL;
1022 		goto out_unlock;
1023 	}
1024 
1025 	unix_state_wlock(sk);
1026 
1027 	if (sk->sk_state != st) {
1028 		unix_state_wunlock(sk);
1029 		unix_state_runlock(other);
1030 		sock_put(other);
1031 		goto restart;
1032 	}
1033 
1034 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1035 	if (err) {
1036 		unix_state_wunlock(sk);
1037 		goto out_unlock;
1038 	}
1039 
1040 	/* The way is open! Fastly set all the necessary fields... */
1041 
1042 	sock_hold(sk);
1043 	unix_peer(newsk)	= sk;
1044 	newsk->sk_state		= TCP_ESTABLISHED;
1045 	newsk->sk_type		= sk->sk_type;
1046 	newsk->sk_peercred.pid	= current->tgid;
1047 	newsk->sk_peercred.uid	= current->euid;
1048 	newsk->sk_peercred.gid	= current->egid;
1049 	newu = unix_sk(newsk);
1050 	newsk->sk_sleep		= &newu->peer_wait;
1051 	otheru = unix_sk(other);
1052 
1053 	/* copy address information from listening to new sock*/
1054 	if (otheru->addr) {
1055 		atomic_inc(&otheru->addr->refcnt);
1056 		newu->addr = otheru->addr;
1057 	}
1058 	if (otheru->dentry) {
1059 		newu->dentry	= dget(otheru->dentry);
1060 		newu->mnt	= mntget(otheru->mnt);
1061 	}
1062 
1063 	/* Set credentials */
1064 	sk->sk_peercred = other->sk_peercred;
1065 
1066 	sock_hold(newsk);
1067 	unix_peer(sk)	= newsk;
1068 	sock->state	= SS_CONNECTED;
1069 	sk->sk_state	= TCP_ESTABLISHED;
1070 
1071 	unix_state_wunlock(sk);
1072 
1073 	/* take ten and and send info to listening sock */
1074 	spin_lock(&other->sk_receive_queue.lock);
1075 	__skb_queue_tail(&other->sk_receive_queue, skb);
1076 	/* Undo artificially decreased inflight after embrion
1077 	 * is installed to listening socket. */
1078 	atomic_inc(&newu->inflight);
1079 	spin_unlock(&other->sk_receive_queue.lock);
1080 	unix_state_runlock(other);
1081 	other->sk_data_ready(other, 0);
1082 	sock_put(other);
1083 	return 0;
1084 
1085 out_unlock:
1086 	if (other)
1087 		unix_state_runlock(other);
1088 
1089 out:
1090 	if (skb)
1091 		kfree_skb(skb);
1092 	if (newsk)
1093 		unix_release_sock(newsk, 0);
1094 	if (other)
1095 		sock_put(other);
1096 	return err;
1097 }
1098 
1099 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1100 {
1101 	struct sock *ska=socka->sk, *skb = sockb->sk;
1102 
1103 	/* Join our sockets back to back */
1104 	sock_hold(ska);
1105 	sock_hold(skb);
1106 	unix_peer(ska)=skb;
1107 	unix_peer(skb)=ska;
1108 	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1109 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1110 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1111 
1112 	if (ska->sk_type != SOCK_DGRAM) {
1113 		ska->sk_state = TCP_ESTABLISHED;
1114 		skb->sk_state = TCP_ESTABLISHED;
1115 		socka->state  = SS_CONNECTED;
1116 		sockb->state  = SS_CONNECTED;
1117 	}
1118 	return 0;
1119 }
1120 
1121 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1122 {
1123 	struct sock *sk = sock->sk;
1124 	struct sock *tsk;
1125 	struct sk_buff *skb;
1126 	int err;
1127 
1128 	err = -EOPNOTSUPP;
1129 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1130 		goto out;
1131 
1132 	err = -EINVAL;
1133 	if (sk->sk_state != TCP_LISTEN)
1134 		goto out;
1135 
1136 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1137 	 * so that no locks are necessary.
1138 	 */
1139 
1140 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1141 	if (!skb) {
1142 		/* This means receive shutdown. */
1143 		if (err == 0)
1144 			err = -EINVAL;
1145 		goto out;
1146 	}
1147 
1148 	tsk = skb->sk;
1149 	skb_free_datagram(sk, skb);
1150 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1151 
1152 	/* attach accepted sock to socket */
1153 	unix_state_wlock(tsk);
1154 	newsock->state = SS_CONNECTED;
1155 	sock_graft(tsk, newsock);
1156 	unix_state_wunlock(tsk);
1157 	return 0;
1158 
1159 out:
1160 	return err;
1161 }
1162 
1163 
1164 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1165 {
1166 	struct sock *sk = sock->sk;
1167 	struct unix_sock *u;
1168 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1169 	int err = 0;
1170 
1171 	if (peer) {
1172 		sk = unix_peer_get(sk);
1173 
1174 		err = -ENOTCONN;
1175 		if (!sk)
1176 			goto out;
1177 		err = 0;
1178 	} else {
1179 		sock_hold(sk);
1180 	}
1181 
1182 	u = unix_sk(sk);
1183 	unix_state_rlock(sk);
1184 	if (!u->addr) {
1185 		sunaddr->sun_family = AF_UNIX;
1186 		sunaddr->sun_path[0] = 0;
1187 		*uaddr_len = sizeof(short);
1188 	} else {
1189 		struct unix_address *addr = u->addr;
1190 
1191 		*uaddr_len = addr->len;
1192 		memcpy(sunaddr, addr->name, *uaddr_len);
1193 	}
1194 	unix_state_runlock(sk);
1195 	sock_put(sk);
1196 out:
1197 	return err;
1198 }
1199 
1200 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1201 {
1202 	int i;
1203 
1204 	scm->fp = UNIXCB(skb).fp;
1205 	skb->destructor = sock_wfree;
1206 	UNIXCB(skb).fp = NULL;
1207 
1208 	for (i=scm->fp->count-1; i>=0; i--)
1209 		unix_notinflight(scm->fp->fp[i]);
1210 }
1211 
1212 static void unix_destruct_fds(struct sk_buff *skb)
1213 {
1214 	struct scm_cookie scm;
1215 	memset(&scm, 0, sizeof(scm));
1216 	unix_detach_fds(&scm, skb);
1217 
1218 	/* Alas, it calls VFS */
1219 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1220 	scm_destroy(&scm);
1221 	sock_wfree(skb);
1222 }
1223 
1224 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1225 {
1226 	int i;
1227 	for (i=scm->fp->count-1; i>=0; i--)
1228 		unix_inflight(scm->fp->fp[i]);
1229 	UNIXCB(skb).fp = scm->fp;
1230 	skb->destructor = unix_destruct_fds;
1231 	scm->fp = NULL;
1232 }
1233 
1234 /*
1235  *	Send AF_UNIX data.
1236  */
1237 
1238 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1239 			      struct msghdr *msg, size_t len)
1240 {
1241 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1242 	struct sock *sk = sock->sk;
1243 	struct unix_sock *u = unix_sk(sk);
1244 	struct sockaddr_un *sunaddr=msg->msg_name;
1245 	struct sock *other = NULL;
1246 	int namelen = 0; /* fake GCC */
1247 	int err;
1248 	unsigned hash;
1249 	struct sk_buff *skb;
1250 	long timeo;
1251 	struct scm_cookie tmp_scm;
1252 
1253 	if (NULL == siocb->scm)
1254 		siocb->scm = &tmp_scm;
1255 	err = scm_send(sock, msg, siocb->scm);
1256 	if (err < 0)
1257 		return err;
1258 
1259 	err = -EOPNOTSUPP;
1260 	if (msg->msg_flags&MSG_OOB)
1261 		goto out;
1262 
1263 	if (msg->msg_namelen) {
1264 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1265 		if (err < 0)
1266 			goto out;
1267 		namelen = err;
1268 	} else {
1269 		sunaddr = NULL;
1270 		err = -ENOTCONN;
1271 		other = unix_peer_get(sk);
1272 		if (!other)
1273 			goto out;
1274 	}
1275 
1276 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1277 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1278 		goto out;
1279 
1280 	err = -EMSGSIZE;
1281 	if (len > sk->sk_sndbuf - 32)
1282 		goto out;
1283 
1284 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1285 	if (skb==NULL)
1286 		goto out;
1287 
1288 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1289 	if (siocb->scm->fp)
1290 		unix_attach_fds(siocb->scm, skb);
1291 
1292 	skb->h.raw = skb->data;
1293 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1294 	if (err)
1295 		goto out_free;
1296 
1297 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1298 
1299 restart:
1300 	if (!other) {
1301 		err = -ECONNRESET;
1302 		if (sunaddr == NULL)
1303 			goto out_free;
1304 
1305 		other = unix_find_other(sunaddr, namelen, sk->sk_type,
1306 					hash, &err);
1307 		if (other==NULL)
1308 			goto out_free;
1309 	}
1310 
1311 	unix_state_rlock(other);
1312 	err = -EPERM;
1313 	if (!unix_may_send(sk, other))
1314 		goto out_unlock;
1315 
1316 	if (sock_flag(other, SOCK_DEAD)) {
1317 		/*
1318 		 *	Check with 1003.1g - what should
1319 		 *	datagram error
1320 		 */
1321 		unix_state_runlock(other);
1322 		sock_put(other);
1323 
1324 		err = 0;
1325 		unix_state_wlock(sk);
1326 		if (unix_peer(sk) == other) {
1327 			unix_peer(sk)=NULL;
1328 			unix_state_wunlock(sk);
1329 
1330 			unix_dgram_disconnected(sk, other);
1331 			sock_put(other);
1332 			err = -ECONNREFUSED;
1333 		} else {
1334 			unix_state_wunlock(sk);
1335 		}
1336 
1337 		other = NULL;
1338 		if (err)
1339 			goto out_free;
1340 		goto restart;
1341 	}
1342 
1343 	err = -EPIPE;
1344 	if (other->sk_shutdown & RCV_SHUTDOWN)
1345 		goto out_unlock;
1346 
1347 	if (sk->sk_type != SOCK_SEQPACKET) {
1348 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1349 		if (err)
1350 			goto out_unlock;
1351 	}
1352 
1353 	if (unix_peer(other) != sk &&
1354 	    (skb_queue_len(&other->sk_receive_queue) >
1355 	     other->sk_max_ack_backlog)) {
1356 		if (!timeo) {
1357 			err = -EAGAIN;
1358 			goto out_unlock;
1359 		}
1360 
1361 		timeo = unix_wait_for_peer(other, timeo);
1362 
1363 		err = sock_intr_errno(timeo);
1364 		if (signal_pending(current))
1365 			goto out_free;
1366 
1367 		goto restart;
1368 	}
1369 
1370 	skb_queue_tail(&other->sk_receive_queue, skb);
1371 	unix_state_runlock(other);
1372 	other->sk_data_ready(other, len);
1373 	sock_put(other);
1374 	scm_destroy(siocb->scm);
1375 	return len;
1376 
1377 out_unlock:
1378 	unix_state_runlock(other);
1379 out_free:
1380 	kfree_skb(skb);
1381 out:
1382 	if (other)
1383 		sock_put(other);
1384 	scm_destroy(siocb->scm);
1385 	return err;
1386 }
1387 
1388 
1389 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1390 			       struct msghdr *msg, size_t len)
1391 {
1392 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1393 	struct sock *sk = sock->sk;
1394 	struct sock *other = NULL;
1395 	struct sockaddr_un *sunaddr=msg->msg_name;
1396 	int err,size;
1397 	struct sk_buff *skb;
1398 	int sent=0;
1399 	struct scm_cookie tmp_scm;
1400 
1401 	if (NULL == siocb->scm)
1402 		siocb->scm = &tmp_scm;
1403 	err = scm_send(sock, msg, siocb->scm);
1404 	if (err < 0)
1405 		return err;
1406 
1407 	err = -EOPNOTSUPP;
1408 	if (msg->msg_flags&MSG_OOB)
1409 		goto out_err;
1410 
1411 	if (msg->msg_namelen) {
1412 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1413 		goto out_err;
1414 	} else {
1415 		sunaddr = NULL;
1416 		err = -ENOTCONN;
1417 		other = unix_peer_get(sk);
1418 		if (!other)
1419 			goto out_err;
1420 	}
1421 
1422 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1423 		goto pipe_err;
1424 
1425 	while(sent < len)
1426 	{
1427 		/*
1428 		 *	Optimisation for the fact that under 0.01% of X messages typically
1429 		 *	need breaking up.
1430 		 */
1431 
1432 		size=len-sent;
1433 
1434 		/* Keep two messages in the pipe so it schedules better */
1435 		if (size > sk->sk_sndbuf / 2 - 64)
1436 			size = sk->sk_sndbuf / 2 - 64;
1437 
1438 		if (size > SKB_MAX_ALLOC)
1439 			size = SKB_MAX_ALLOC;
1440 
1441 		/*
1442 		 *	Grab a buffer
1443 		 */
1444 
1445 		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1446 
1447 		if (skb==NULL)
1448 			goto out_err;
1449 
1450 		/*
1451 		 *	If you pass two values to the sock_alloc_send_skb
1452 		 *	it tries to grab the large buffer with GFP_NOFS
1453 		 *	(which can fail easily), and if it fails grab the
1454 		 *	fallback size buffer which is under a page and will
1455 		 *	succeed. [Alan]
1456 		 */
1457 		size = min_t(int, size, skb_tailroom(skb));
1458 
1459 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1460 		if (siocb->scm->fp)
1461 			unix_attach_fds(siocb->scm, skb);
1462 
1463 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1464 			kfree_skb(skb);
1465 			goto out_err;
1466 		}
1467 
1468 		unix_state_rlock(other);
1469 
1470 		if (sock_flag(other, SOCK_DEAD) ||
1471 		    (other->sk_shutdown & RCV_SHUTDOWN))
1472 			goto pipe_err_free;
1473 
1474 		skb_queue_tail(&other->sk_receive_queue, skb);
1475 		unix_state_runlock(other);
1476 		other->sk_data_ready(other, size);
1477 		sent+=size;
1478 	}
1479 	sock_put(other);
1480 
1481 	scm_destroy(siocb->scm);
1482 	siocb->scm = NULL;
1483 
1484 	return sent;
1485 
1486 pipe_err_free:
1487 	unix_state_runlock(other);
1488 	kfree_skb(skb);
1489 pipe_err:
1490 	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1491 		send_sig(SIGPIPE,current,0);
1492 	err = -EPIPE;
1493 out_err:
1494         if (other)
1495 		sock_put(other);
1496 	scm_destroy(siocb->scm);
1497 	siocb->scm = NULL;
1498 	return sent ? : err;
1499 }
1500 
1501 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1502 				  struct msghdr *msg, size_t len)
1503 {
1504 	int err;
1505 	struct sock *sk = sock->sk;
1506 
1507 	err = sock_error(sk);
1508 	if (err)
1509 		return err;
1510 
1511 	if (sk->sk_state != TCP_ESTABLISHED)
1512 		return -ENOTCONN;
1513 
1514 	if (msg->msg_namelen)
1515 		msg->msg_namelen = 0;
1516 
1517 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1518 }
1519 
1520 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1521 {
1522 	struct unix_sock *u = unix_sk(sk);
1523 
1524 	msg->msg_namelen = 0;
1525 	if (u->addr) {
1526 		msg->msg_namelen = u->addr->len;
1527 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1528 	}
1529 }
1530 
1531 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1532 			      struct msghdr *msg, size_t size,
1533 			      int flags)
1534 {
1535 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1536 	struct scm_cookie tmp_scm;
1537 	struct sock *sk = sock->sk;
1538 	struct unix_sock *u = unix_sk(sk);
1539 	int noblock = flags & MSG_DONTWAIT;
1540 	struct sk_buff *skb;
1541 	int err;
1542 
1543 	err = -EOPNOTSUPP;
1544 	if (flags&MSG_OOB)
1545 		goto out;
1546 
1547 	msg->msg_namelen = 0;
1548 
1549 	down(&u->readsem);
1550 
1551 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1552 	if (!skb)
1553 		goto out_unlock;
1554 
1555 	wake_up_interruptible(&u->peer_wait);
1556 
1557 	if (msg->msg_name)
1558 		unix_copy_addr(msg, skb->sk);
1559 
1560 	if (size > skb->len)
1561 		size = skb->len;
1562 	else if (size < skb->len)
1563 		msg->msg_flags |= MSG_TRUNC;
1564 
1565 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1566 	if (err)
1567 		goto out_free;
1568 
1569 	if (!siocb->scm) {
1570 		siocb->scm = &tmp_scm;
1571 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1572 	}
1573 	siocb->scm->creds = *UNIXCREDS(skb);
1574 
1575 	if (!(flags & MSG_PEEK))
1576 	{
1577 		if (UNIXCB(skb).fp)
1578 			unix_detach_fds(siocb->scm, skb);
1579 	}
1580 	else
1581 	{
1582 		/* It is questionable: on PEEK we could:
1583 		   - do not return fds - good, but too simple 8)
1584 		   - return fds, and do not return them on read (old strategy,
1585 		     apparently wrong)
1586 		   - clone fds (I chose it for now, it is the most universal
1587 		     solution)
1588 
1589 	           POSIX 1003.1g does not actually define this clearly
1590 	           at all. POSIX 1003.1g doesn't define a lot of things
1591 	           clearly however!
1592 
1593 		*/
1594 		if (UNIXCB(skb).fp)
1595 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1596 	}
1597 	err = size;
1598 
1599 	scm_recv(sock, msg, siocb->scm, flags);
1600 
1601 out_free:
1602 	skb_free_datagram(sk,skb);
1603 out_unlock:
1604 	up(&u->readsem);
1605 out:
1606 	return err;
1607 }
1608 
1609 /*
1610  *	Sleep until data has arrive. But check for races..
1611  */
1612 
1613 static long unix_stream_data_wait(struct sock * sk, long timeo)
1614 {
1615 	DEFINE_WAIT(wait);
1616 
1617 	unix_state_rlock(sk);
1618 
1619 	for (;;) {
1620 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1621 
1622 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1623 		    sk->sk_err ||
1624 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1625 		    signal_pending(current) ||
1626 		    !timeo)
1627 			break;
1628 
1629 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1630 		unix_state_runlock(sk);
1631 		timeo = schedule_timeout(timeo);
1632 		unix_state_rlock(sk);
1633 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1634 	}
1635 
1636 	finish_wait(sk->sk_sleep, &wait);
1637 	unix_state_runlock(sk);
1638 	return timeo;
1639 }
1640 
1641 
1642 
1643 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1644 			       struct msghdr *msg, size_t size,
1645 			       int flags)
1646 {
1647 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1648 	struct scm_cookie tmp_scm;
1649 	struct sock *sk = sock->sk;
1650 	struct unix_sock *u = unix_sk(sk);
1651 	struct sockaddr_un *sunaddr=msg->msg_name;
1652 	int copied = 0;
1653 	int check_creds = 0;
1654 	int target;
1655 	int err = 0;
1656 	long timeo;
1657 
1658 	err = -EINVAL;
1659 	if (sk->sk_state != TCP_ESTABLISHED)
1660 		goto out;
1661 
1662 	err = -EOPNOTSUPP;
1663 	if (flags&MSG_OOB)
1664 		goto out;
1665 
1666 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1667 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1668 
1669 	msg->msg_namelen = 0;
1670 
1671 	/* Lock the socket to prevent queue disordering
1672 	 * while sleeps in memcpy_tomsg
1673 	 */
1674 
1675 	if (!siocb->scm) {
1676 		siocb->scm = &tmp_scm;
1677 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1678 	}
1679 
1680 	down(&u->readsem);
1681 
1682 	do
1683 	{
1684 		int chunk;
1685 		struct sk_buff *skb;
1686 
1687 		skb = skb_dequeue(&sk->sk_receive_queue);
1688 		if (skb==NULL)
1689 		{
1690 			if (copied >= target)
1691 				break;
1692 
1693 			/*
1694 			 *	POSIX 1003.1g mandates this order.
1695 			 */
1696 
1697 			if ((err = sock_error(sk)) != 0)
1698 				break;
1699 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1700 				break;
1701 			err = -EAGAIN;
1702 			if (!timeo)
1703 				break;
1704 			up(&u->readsem);
1705 
1706 			timeo = unix_stream_data_wait(sk, timeo);
1707 
1708 			if (signal_pending(current)) {
1709 				err = sock_intr_errno(timeo);
1710 				goto out;
1711 			}
1712 			down(&u->readsem);
1713 			continue;
1714 		}
1715 
1716 		if (check_creds) {
1717 			/* Never glue messages from different writers */
1718 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1719 				skb_queue_head(&sk->sk_receive_queue, skb);
1720 				break;
1721 			}
1722 		} else {
1723 			/* Copy credentials */
1724 			siocb->scm->creds = *UNIXCREDS(skb);
1725 			check_creds = 1;
1726 		}
1727 
1728 		/* Copy address just once */
1729 		if (sunaddr)
1730 		{
1731 			unix_copy_addr(msg, skb->sk);
1732 			sunaddr = NULL;
1733 		}
1734 
1735 		chunk = min_t(unsigned int, skb->len, size);
1736 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1737 			skb_queue_head(&sk->sk_receive_queue, skb);
1738 			if (copied == 0)
1739 				copied = -EFAULT;
1740 			break;
1741 		}
1742 		copied += chunk;
1743 		size -= chunk;
1744 
1745 		/* Mark read part of skb as used */
1746 		if (!(flags & MSG_PEEK))
1747 		{
1748 			skb_pull(skb, chunk);
1749 
1750 			if (UNIXCB(skb).fp)
1751 				unix_detach_fds(siocb->scm, skb);
1752 
1753 			/* put the skb back if we didn't use it up.. */
1754 			if (skb->len)
1755 			{
1756 				skb_queue_head(&sk->sk_receive_queue, skb);
1757 				break;
1758 			}
1759 
1760 			kfree_skb(skb);
1761 
1762 			if (siocb->scm->fp)
1763 				break;
1764 		}
1765 		else
1766 		{
1767 			/* It is questionable, see note in unix_dgram_recvmsg.
1768 			 */
1769 			if (UNIXCB(skb).fp)
1770 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1771 
1772 			/* put message back and return */
1773 			skb_queue_head(&sk->sk_receive_queue, skb);
1774 			break;
1775 		}
1776 	} while (size);
1777 
1778 	up(&u->readsem);
1779 	scm_recv(sock, msg, siocb->scm, flags);
1780 out:
1781 	return copied ? : err;
1782 }
1783 
1784 static int unix_shutdown(struct socket *sock, int mode)
1785 {
1786 	struct sock *sk = sock->sk;
1787 	struct sock *other;
1788 
1789 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1790 
1791 	if (mode) {
1792 		unix_state_wlock(sk);
1793 		sk->sk_shutdown |= mode;
1794 		other=unix_peer(sk);
1795 		if (other)
1796 			sock_hold(other);
1797 		unix_state_wunlock(sk);
1798 		sk->sk_state_change(sk);
1799 
1800 		if (other &&
1801 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1802 
1803 			int peer_mode = 0;
1804 
1805 			if (mode&RCV_SHUTDOWN)
1806 				peer_mode |= SEND_SHUTDOWN;
1807 			if (mode&SEND_SHUTDOWN)
1808 				peer_mode |= RCV_SHUTDOWN;
1809 			unix_state_wlock(other);
1810 			other->sk_shutdown |= peer_mode;
1811 			unix_state_wunlock(other);
1812 			other->sk_state_change(other);
1813 			read_lock(&other->sk_callback_lock);
1814 			if (peer_mode == SHUTDOWN_MASK)
1815 				sk_wake_async(other,1,POLL_HUP);
1816 			else if (peer_mode & RCV_SHUTDOWN)
1817 				sk_wake_async(other,1,POLL_IN);
1818 			read_unlock(&other->sk_callback_lock);
1819 		}
1820 		if (other)
1821 			sock_put(other);
1822 	}
1823 	return 0;
1824 }
1825 
1826 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1827 {
1828 	struct sock *sk = sock->sk;
1829 	long amount=0;
1830 	int err;
1831 
1832 	switch(cmd)
1833 	{
1834 		case SIOCOUTQ:
1835 			amount = atomic_read(&sk->sk_wmem_alloc);
1836 			err = put_user(amount, (int __user *)arg);
1837 			break;
1838 		case SIOCINQ:
1839 		{
1840 			struct sk_buff *skb;
1841 
1842 			if (sk->sk_state == TCP_LISTEN) {
1843 				err = -EINVAL;
1844 				break;
1845 			}
1846 
1847 			spin_lock(&sk->sk_receive_queue.lock);
1848 			if (sk->sk_type == SOCK_STREAM ||
1849 			    sk->sk_type == SOCK_SEQPACKET) {
1850 				skb_queue_walk(&sk->sk_receive_queue, skb)
1851 					amount += skb->len;
1852 			} else {
1853 				skb = skb_peek(&sk->sk_receive_queue);
1854 				if (skb)
1855 					amount=skb->len;
1856 			}
1857 			spin_unlock(&sk->sk_receive_queue.lock);
1858 			err = put_user(amount, (int __user *)arg);
1859 			break;
1860 		}
1861 
1862 		default:
1863 			err = dev_ioctl(cmd, (void __user *)arg);
1864 			break;
1865 	}
1866 	return err;
1867 }
1868 
1869 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1870 {
1871 	struct sock *sk = sock->sk;
1872 	unsigned int mask;
1873 
1874 	poll_wait(file, sk->sk_sleep, wait);
1875 	mask = 0;
1876 
1877 	/* exceptional events? */
1878 	if (sk->sk_err)
1879 		mask |= POLLERR;
1880 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1881 		mask |= POLLHUP;
1882 
1883 	/* readable? */
1884 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1885 	    (sk->sk_shutdown & RCV_SHUTDOWN))
1886 		mask |= POLLIN | POLLRDNORM;
1887 
1888 	/* Connection-based need to check for termination and startup */
1889 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1890 		mask |= POLLHUP;
1891 
1892 	/*
1893 	 * we set writable also when the other side has shut down the
1894 	 * connection. This prevents stuck sockets.
1895 	 */
1896 	if (unix_writable(sk))
1897 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1898 
1899 	return mask;
1900 }
1901 
1902 
1903 #ifdef CONFIG_PROC_FS
1904 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1905 {
1906 	loff_t off = 0;
1907 	struct sock *s;
1908 
1909 	for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1910 		if (off == pos)
1911 			return s;
1912 		++off;
1913 	}
1914 	return NULL;
1915 }
1916 
1917 
1918 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1919 {
1920 	read_lock(&unix_table_lock);
1921 	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1922 }
1923 
1924 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1925 {
1926 	++*pos;
1927 
1928 	if (v == (void *)1)
1929 		return first_unix_socket(seq->private);
1930 	return next_unix_socket(seq->private, v);
1931 }
1932 
1933 static void unix_seq_stop(struct seq_file *seq, void *v)
1934 {
1935 	read_unlock(&unix_table_lock);
1936 }
1937 
1938 static int unix_seq_show(struct seq_file *seq, void *v)
1939 {
1940 
1941 	if (v == (void *)1)
1942 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1943 			 "Inode Path\n");
1944 	else {
1945 		struct sock *s = v;
1946 		struct unix_sock *u = unix_sk(s);
1947 		unix_state_rlock(s);
1948 
1949 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1950 			s,
1951 			atomic_read(&s->sk_refcnt),
1952 			0,
1953 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1954 			s->sk_type,
1955 			s->sk_socket ?
1956 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1957 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1958 			sock_i_ino(s));
1959 
1960 		if (u->addr) {
1961 			int i, len;
1962 			seq_putc(seq, ' ');
1963 
1964 			i = 0;
1965 			len = u->addr->len - sizeof(short);
1966 			if (!UNIX_ABSTRACT(s))
1967 				len--;
1968 			else {
1969 				seq_putc(seq, '@');
1970 				i++;
1971 			}
1972 			for ( ; i < len; i++)
1973 				seq_putc(seq, u->addr->name->sun_path[i]);
1974 		}
1975 		unix_state_runlock(s);
1976 		seq_putc(seq, '\n');
1977 	}
1978 
1979 	return 0;
1980 }
1981 
1982 static struct seq_operations unix_seq_ops = {
1983 	.start  = unix_seq_start,
1984 	.next   = unix_seq_next,
1985 	.stop   = unix_seq_stop,
1986 	.show   = unix_seq_show,
1987 };
1988 
1989 
1990 static int unix_seq_open(struct inode *inode, struct file *file)
1991 {
1992 	struct seq_file *seq;
1993 	int rc = -ENOMEM;
1994 	int *iter = kmalloc(sizeof(int), GFP_KERNEL);
1995 
1996 	if (!iter)
1997 		goto out;
1998 
1999 	rc = seq_open(file, &unix_seq_ops);
2000 	if (rc)
2001 		goto out_kfree;
2002 
2003 	seq	     = file->private_data;
2004 	seq->private = iter;
2005 	*iter = 0;
2006 out:
2007 	return rc;
2008 out_kfree:
2009 	kfree(iter);
2010 	goto out;
2011 }
2012 
2013 static struct file_operations unix_seq_fops = {
2014 	.owner		= THIS_MODULE,
2015 	.open		= unix_seq_open,
2016 	.read		= seq_read,
2017 	.llseek		= seq_lseek,
2018 	.release	= seq_release_private,
2019 };
2020 
2021 #endif
2022 
2023 static struct net_proto_family unix_family_ops = {
2024 	.family = PF_UNIX,
2025 	.create = unix_create,
2026 	.owner	= THIS_MODULE,
2027 };
2028 
2029 static int __init af_unix_init(void)
2030 {
2031 	int rc = -1;
2032 	struct sk_buff *dummy_skb;
2033 
2034 	if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2035 		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2036 		goto out;
2037 	}
2038 
2039 	rc = proto_register(&unix_proto, 1);
2040         if (rc != 0) {
2041                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2042 		       __FUNCTION__);
2043 		goto out;
2044 	}
2045 
2046 	sock_register(&unix_family_ops);
2047 #ifdef CONFIG_PROC_FS
2048 	proc_net_fops_create("unix", 0, &unix_seq_fops);
2049 #endif
2050 	unix_sysctl_register();
2051 out:
2052 	return rc;
2053 }
2054 
2055 static void __exit af_unix_exit(void)
2056 {
2057 	sock_unregister(PF_UNIX);
2058 	unix_sysctl_unregister();
2059 	proc_net_remove("unix");
2060 	proto_unregister(&unix_proto);
2061 }
2062 
2063 module_init(af_unix_init);
2064 module_exit(af_unix_exit);
2065 
2066 MODULE_LICENSE("GPL");
2067 MODULE_ALIAS_NETPROTO(PF_UNIX);
2068