xref: /openbmc/linux/net/unix/af_unix.c (revision 9ac8d3fb)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len]=0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if(dentry && dentry->d_inode == i)
299 		{
300 			sock_hold(s);
301 			goto found;
302 		}
303 	}
304 	s = NULL;
305 found:
306 	spin_unlock(&unix_table_lock);
307 	return s;
308 }
309 
310 static inline int unix_writable(struct sock *sk)
311 {
312 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
313 }
314 
315 static void unix_write_space(struct sock *sk)
316 {
317 	read_lock(&sk->sk_callback_lock);
318 	if (unix_writable(sk)) {
319 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
320 			wake_up_interruptible_sync(sk->sk_sleep);
321 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
322 	}
323 	read_unlock(&sk->sk_callback_lock);
324 }
325 
326 /* When dgram socket disconnects (or changes its peer), we clear its receive
327  * queue of packets arrived from previous peer. First, it allows to do
328  * flow control based only on wmem_alloc; second, sk connected to peer
329  * may receive messages only from that peer. */
330 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
331 {
332 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
333 		skb_queue_purge(&sk->sk_receive_queue);
334 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
335 
336 		/* If one link of bidirectional dgram pipe is disconnected,
337 		 * we signal error. Messages are lost. Do not make this,
338 		 * when peer was not connected to us.
339 		 */
340 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
341 			other->sk_err = ECONNRESET;
342 			other->sk_error_report(other);
343 		}
344 	}
345 }
346 
347 static void unix_sock_destructor(struct sock *sk)
348 {
349 	struct unix_sock *u = unix_sk(sk);
350 
351 	skb_queue_purge(&sk->sk_receive_queue);
352 
353 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
354 	WARN_ON(!sk_unhashed(sk));
355 	WARN_ON(sk->sk_socket);
356 	if (!sock_flag(sk, SOCK_DEAD)) {
357 		printk("Attempt to release alive unix socket: %p\n", sk);
358 		return;
359 	}
360 
361 	if (u->addr)
362 		unix_release_addr(u->addr);
363 
364 	atomic_dec(&unix_nr_socks);
365 #ifdef UNIX_REFCNT_DEBUG
366 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
367 #endif
368 }
369 
370 static int unix_release_sock (struct sock *sk, int embrion)
371 {
372 	struct unix_sock *u = unix_sk(sk);
373 	struct dentry *dentry;
374 	struct vfsmount *mnt;
375 	struct sock *skpair;
376 	struct sk_buff *skb;
377 	int state;
378 
379 	unix_remove_socket(sk);
380 
381 	/* Clear state */
382 	unix_state_lock(sk);
383 	sock_orphan(sk);
384 	sk->sk_shutdown = SHUTDOWN_MASK;
385 	dentry	     = u->dentry;
386 	u->dentry    = NULL;
387 	mnt	     = u->mnt;
388 	u->mnt	     = NULL;
389 	state = sk->sk_state;
390 	sk->sk_state = TCP_CLOSE;
391 	unix_state_unlock(sk);
392 
393 	wake_up_interruptible_all(&u->peer_wait);
394 
395 	skpair=unix_peer(sk);
396 
397 	if (skpair!=NULL) {
398 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
399 			unix_state_lock(skpair);
400 			/* No more writes */
401 			skpair->sk_shutdown = SHUTDOWN_MASK;
402 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
403 				skpair->sk_err = ECONNRESET;
404 			unix_state_unlock(skpair);
405 			skpair->sk_state_change(skpair);
406 			read_lock(&skpair->sk_callback_lock);
407 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
408 			read_unlock(&skpair->sk_callback_lock);
409 		}
410 		sock_put(skpair); /* It may now die */
411 		unix_peer(sk) = NULL;
412 	}
413 
414 	/* Try to flush out this socket. Throw out buffers at least */
415 
416 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
417 		if (state==TCP_LISTEN)
418 			unix_release_sock(skb->sk, 1);
419 		/* passed fds are erased in the kfree_skb hook	      */
420 		kfree_skb(skb);
421 	}
422 
423 	if (dentry) {
424 		dput(dentry);
425 		mntput(mnt);
426 	}
427 
428 	sock_put(sk);
429 
430 	/* ---- Socket is dead now and most probably destroyed ---- */
431 
432 	/*
433 	 * Fixme: BSD difference: In BSD all sockets connected to use get
434 	 *	  ECONNRESET and we die on the spot. In Linux we behave
435 	 *	  like files and pipes do and wait for the last
436 	 *	  dereference.
437 	 *
438 	 * Can't we simply set sock->err?
439 	 *
440 	 *	  What the above comment does talk about? --ANK(980817)
441 	 */
442 
443 	if (unix_tot_inflight)
444 		unix_gc();		/* Garbage collect fds */
445 
446 	return 0;
447 }
448 
449 static int unix_listen(struct socket *sock, int backlog)
450 {
451 	int err;
452 	struct sock *sk = sock->sk;
453 	struct unix_sock *u = unix_sk(sk);
454 
455 	err = -EOPNOTSUPP;
456 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
457 		goto out;			/* Only stream/seqpacket sockets accept */
458 	err = -EINVAL;
459 	if (!u->addr)
460 		goto out;			/* No listens on an unbound socket */
461 	unix_state_lock(sk);
462 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
463 		goto out_unlock;
464 	if (backlog > sk->sk_max_ack_backlog)
465 		wake_up_interruptible_all(&u->peer_wait);
466 	sk->sk_max_ack_backlog	= backlog;
467 	sk->sk_state		= TCP_LISTEN;
468 	/* set credentials so connect can copy them */
469 	sk->sk_peercred.pid	= task_tgid_vnr(current);
470 	sk->sk_peercred.uid	= current->euid;
471 	sk->sk_peercred.gid	= current->egid;
472 	err = 0;
473 
474 out_unlock:
475 	unix_state_unlock(sk);
476 out:
477 	return err;
478 }
479 
480 static int unix_release(struct socket *);
481 static int unix_bind(struct socket *, struct sockaddr *, int);
482 static int unix_stream_connect(struct socket *, struct sockaddr *,
483 			       int addr_len, int flags);
484 static int unix_socketpair(struct socket *, struct socket *);
485 static int unix_accept(struct socket *, struct socket *, int);
486 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
487 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
488 static unsigned int unix_dgram_poll(struct file *, struct socket *,
489 				    poll_table *);
490 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
491 static int unix_shutdown(struct socket *, int);
492 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
493 			       struct msghdr *, size_t);
494 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t, int);
496 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
497 			      struct msghdr *, size_t);
498 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t, int);
500 static int unix_dgram_connect(struct socket *, struct sockaddr *,
501 			      int, int);
502 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
503 				  struct msghdr *, size_t);
504 
505 static const struct proto_ops unix_stream_ops = {
506 	.family =	PF_UNIX,
507 	.owner =	THIS_MODULE,
508 	.release =	unix_release,
509 	.bind =		unix_bind,
510 	.connect =	unix_stream_connect,
511 	.socketpair =	unix_socketpair,
512 	.accept =	unix_accept,
513 	.getname =	unix_getname,
514 	.poll =		unix_poll,
515 	.ioctl =	unix_ioctl,
516 	.listen =	unix_listen,
517 	.shutdown =	unix_shutdown,
518 	.setsockopt =	sock_no_setsockopt,
519 	.getsockopt =	sock_no_getsockopt,
520 	.sendmsg =	unix_stream_sendmsg,
521 	.recvmsg =	unix_stream_recvmsg,
522 	.mmap =		sock_no_mmap,
523 	.sendpage =	sock_no_sendpage,
524 };
525 
526 static const struct proto_ops unix_dgram_ops = {
527 	.family =	PF_UNIX,
528 	.owner =	THIS_MODULE,
529 	.release =	unix_release,
530 	.bind =		unix_bind,
531 	.connect =	unix_dgram_connect,
532 	.socketpair =	unix_socketpair,
533 	.accept =	sock_no_accept,
534 	.getname =	unix_getname,
535 	.poll =		unix_dgram_poll,
536 	.ioctl =	unix_ioctl,
537 	.listen =	sock_no_listen,
538 	.shutdown =	unix_shutdown,
539 	.setsockopt =	sock_no_setsockopt,
540 	.getsockopt =	sock_no_getsockopt,
541 	.sendmsg =	unix_dgram_sendmsg,
542 	.recvmsg =	unix_dgram_recvmsg,
543 	.mmap =		sock_no_mmap,
544 	.sendpage =	sock_no_sendpage,
545 };
546 
547 static const struct proto_ops unix_seqpacket_ops = {
548 	.family =	PF_UNIX,
549 	.owner =	THIS_MODULE,
550 	.release =	unix_release,
551 	.bind =		unix_bind,
552 	.connect =	unix_stream_connect,
553 	.socketpair =	unix_socketpair,
554 	.accept =	unix_accept,
555 	.getname =	unix_getname,
556 	.poll =		unix_dgram_poll,
557 	.ioctl =	unix_ioctl,
558 	.listen =	unix_listen,
559 	.shutdown =	unix_shutdown,
560 	.setsockopt =	sock_no_setsockopt,
561 	.getsockopt =	sock_no_getsockopt,
562 	.sendmsg =	unix_seqpacket_sendmsg,
563 	.recvmsg =	unix_dgram_recvmsg,
564 	.mmap =		sock_no_mmap,
565 	.sendpage =	sock_no_sendpage,
566 };
567 
568 static struct proto unix_proto = {
569 	.name	  = "UNIX",
570 	.owner	  = THIS_MODULE,
571 	.obj_size = sizeof(struct unix_sock),
572 };
573 
574 /*
575  * AF_UNIX sockets do not interact with hardware, hence they
576  * dont trigger interrupts - so it's safe for them to have
577  * bh-unsafe locking for their sk_receive_queue.lock. Split off
578  * this special lock-class by reinitializing the spinlock key:
579  */
580 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
581 
582 static struct sock * unix_create1(struct net *net, struct socket *sock)
583 {
584 	struct sock *sk = NULL;
585 	struct unix_sock *u;
586 
587 	atomic_inc(&unix_nr_socks);
588 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
589 		goto out;
590 
591 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
592 	if (!sk)
593 		goto out;
594 
595 	sock_init_data(sock,sk);
596 	lockdep_set_class(&sk->sk_receive_queue.lock,
597 				&af_unix_sk_receive_queue_lock_key);
598 
599 	sk->sk_write_space	= unix_write_space;
600 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
601 	sk->sk_destruct		= unix_sock_destructor;
602 	u	  = unix_sk(sk);
603 	u->dentry = NULL;
604 	u->mnt	  = NULL;
605 	spin_lock_init(&u->lock);
606 	atomic_long_set(&u->inflight, 0);
607 	INIT_LIST_HEAD(&u->link);
608 	mutex_init(&u->readlock); /* single task reading lock */
609 	init_waitqueue_head(&u->peer_wait);
610 	unix_insert_socket(unix_sockets_unbound, sk);
611 out:
612 	if (sk == NULL)
613 		atomic_dec(&unix_nr_socks);
614 	return sk;
615 }
616 
617 static int unix_create(struct net *net, struct socket *sock, int protocol)
618 {
619 	if (protocol && protocol != PF_UNIX)
620 		return -EPROTONOSUPPORT;
621 
622 	sock->state = SS_UNCONNECTED;
623 
624 	switch (sock->type) {
625 	case SOCK_STREAM:
626 		sock->ops = &unix_stream_ops;
627 		break;
628 		/*
629 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
630 		 *	nothing uses it.
631 		 */
632 	case SOCK_RAW:
633 		sock->type=SOCK_DGRAM;
634 	case SOCK_DGRAM:
635 		sock->ops = &unix_dgram_ops;
636 		break;
637 	case SOCK_SEQPACKET:
638 		sock->ops = &unix_seqpacket_ops;
639 		break;
640 	default:
641 		return -ESOCKTNOSUPPORT;
642 	}
643 
644 	return unix_create1(net, sock) ? 0 : -ENOMEM;
645 }
646 
647 static int unix_release(struct socket *sock)
648 {
649 	struct sock *sk = sock->sk;
650 
651 	if (!sk)
652 		return 0;
653 
654 	sock->sk = NULL;
655 
656 	return unix_release_sock (sk, 0);
657 }
658 
659 static int unix_autobind(struct socket *sock)
660 {
661 	struct sock *sk = sock->sk;
662 	struct net *net = sock_net(sk);
663 	struct unix_sock *u = unix_sk(sk);
664 	static u32 ordernum = 1;
665 	struct unix_address * addr;
666 	int err;
667 
668 	mutex_lock(&u->readlock);
669 
670 	err = 0;
671 	if (u->addr)
672 		goto out;
673 
674 	err = -ENOMEM;
675 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
676 	if (!addr)
677 		goto out;
678 
679 	addr->name->sun_family = AF_UNIX;
680 	atomic_set(&addr->refcnt, 1);
681 
682 retry:
683 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
684 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
685 
686 	spin_lock(&unix_table_lock);
687 	ordernum = (ordernum+1)&0xFFFFF;
688 
689 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
690 				      addr->hash)) {
691 		spin_unlock(&unix_table_lock);
692 		/* Sanity yield. It is unusual case, but yet... */
693 		if (!(ordernum&0xFF))
694 			yield();
695 		goto retry;
696 	}
697 	addr->hash ^= sk->sk_type;
698 
699 	__unix_remove_socket(sk);
700 	u->addr = addr;
701 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
702 	spin_unlock(&unix_table_lock);
703 	err = 0;
704 
705 out:	mutex_unlock(&u->readlock);
706 	return err;
707 }
708 
709 static struct sock *unix_find_other(struct net *net,
710 				    struct sockaddr_un *sunname, int len,
711 				    int type, unsigned hash, int *error)
712 {
713 	struct sock *u;
714 	struct path path;
715 	int err = 0;
716 
717 	if (sunname->sun_path[0]) {
718 		struct inode *inode;
719 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
720 		if (err)
721 			goto fail;
722 		inode = path.dentry->d_inode;
723 		err = inode_permission(inode, MAY_WRITE);
724 		if (err)
725 			goto put_fail;
726 
727 		err = -ECONNREFUSED;
728 		if (!S_ISSOCK(inode->i_mode))
729 			goto put_fail;
730 		u = unix_find_socket_byinode(net, inode);
731 		if (!u)
732 			goto put_fail;
733 
734 		if (u->sk_type == type)
735 			touch_atime(path.mnt, path.dentry);
736 
737 		path_put(&path);
738 
739 		err=-EPROTOTYPE;
740 		if (u->sk_type != type) {
741 			sock_put(u);
742 			goto fail;
743 		}
744 	} else {
745 		err = -ECONNREFUSED;
746 		u=unix_find_socket_byname(net, sunname, len, type, hash);
747 		if (u) {
748 			struct dentry *dentry;
749 			dentry = unix_sk(u)->dentry;
750 			if (dentry)
751 				touch_atime(unix_sk(u)->mnt, dentry);
752 		} else
753 			goto fail;
754 	}
755 	return u;
756 
757 put_fail:
758 	path_put(&path);
759 fail:
760 	*error=err;
761 	return NULL;
762 }
763 
764 
765 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
766 {
767 	struct sock *sk = sock->sk;
768 	struct net *net = sock_net(sk);
769 	struct unix_sock *u = unix_sk(sk);
770 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
771 	struct dentry * dentry = NULL;
772 	struct nameidata nd;
773 	int err;
774 	unsigned hash;
775 	struct unix_address *addr;
776 	struct hlist_head *list;
777 
778 	err = -EINVAL;
779 	if (sunaddr->sun_family != AF_UNIX)
780 		goto out;
781 
782 	if (addr_len==sizeof(short)) {
783 		err = unix_autobind(sock);
784 		goto out;
785 	}
786 
787 	err = unix_mkname(sunaddr, addr_len, &hash);
788 	if (err < 0)
789 		goto out;
790 	addr_len = err;
791 
792 	mutex_lock(&u->readlock);
793 
794 	err = -EINVAL;
795 	if (u->addr)
796 		goto out_up;
797 
798 	err = -ENOMEM;
799 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
800 	if (!addr)
801 		goto out_up;
802 
803 	memcpy(addr->name, sunaddr, addr_len);
804 	addr->len = addr_len;
805 	addr->hash = hash ^ sk->sk_type;
806 	atomic_set(&addr->refcnt, 1);
807 
808 	if (sunaddr->sun_path[0]) {
809 		unsigned int mode;
810 		err = 0;
811 		/*
812 		 * Get the parent directory, calculate the hash for last
813 		 * component.
814 		 */
815 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
816 		if (err)
817 			goto out_mknod_parent;
818 
819 		dentry = lookup_create(&nd, 0);
820 		err = PTR_ERR(dentry);
821 		if (IS_ERR(dentry))
822 			goto out_mknod_unlock;
823 
824 		/*
825 		 * All right, let's create it.
826 		 */
827 		mode = S_IFSOCK |
828 		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
829 		err = mnt_want_write(nd.path.mnt);
830 		if (err)
831 			goto out_mknod_dput;
832 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
833 		mnt_drop_write(nd.path.mnt);
834 		if (err)
835 			goto out_mknod_dput;
836 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
837 		dput(nd.path.dentry);
838 		nd.path.dentry = dentry;
839 
840 		addr->hash = UNIX_HASH_SIZE;
841 	}
842 
843 	spin_lock(&unix_table_lock);
844 
845 	if (!sunaddr->sun_path[0]) {
846 		err = -EADDRINUSE;
847 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
848 					      sk->sk_type, hash)) {
849 			unix_release_addr(addr);
850 			goto out_unlock;
851 		}
852 
853 		list = &unix_socket_table[addr->hash];
854 	} else {
855 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
856 		u->dentry = nd.path.dentry;
857 		u->mnt    = nd.path.mnt;
858 	}
859 
860 	err = 0;
861 	__unix_remove_socket(sk);
862 	u->addr = addr;
863 	__unix_insert_socket(list, sk);
864 
865 out_unlock:
866 	spin_unlock(&unix_table_lock);
867 out_up:
868 	mutex_unlock(&u->readlock);
869 out:
870 	return err;
871 
872 out_mknod_dput:
873 	dput(dentry);
874 out_mknod_unlock:
875 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
876 	path_put(&nd.path);
877 out_mknod_parent:
878 	if (err==-EEXIST)
879 		err=-EADDRINUSE;
880 	unix_release_addr(addr);
881 	goto out_up;
882 }
883 
884 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
885 {
886 	if (unlikely(sk1 == sk2) || !sk2) {
887 		unix_state_lock(sk1);
888 		return;
889 	}
890 	if (sk1 < sk2) {
891 		unix_state_lock(sk1);
892 		unix_state_lock_nested(sk2);
893 	} else {
894 		unix_state_lock(sk2);
895 		unix_state_lock_nested(sk1);
896 	}
897 }
898 
899 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
900 {
901 	if (unlikely(sk1 == sk2) || !sk2) {
902 		unix_state_unlock(sk1);
903 		return;
904 	}
905 	unix_state_unlock(sk1);
906 	unix_state_unlock(sk2);
907 }
908 
909 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
910 			      int alen, int flags)
911 {
912 	struct sock *sk = sock->sk;
913 	struct net *net = sock_net(sk);
914 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
915 	struct sock *other;
916 	unsigned hash;
917 	int err;
918 
919 	if (addr->sa_family != AF_UNSPEC) {
920 		err = unix_mkname(sunaddr, alen, &hash);
921 		if (err < 0)
922 			goto out;
923 		alen = err;
924 
925 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
926 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
927 			goto out;
928 
929 restart:
930 		other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
931 		if (!other)
932 			goto out;
933 
934 		unix_state_double_lock(sk, other);
935 
936 		/* Apparently VFS overslept socket death. Retry. */
937 		if (sock_flag(other, SOCK_DEAD)) {
938 			unix_state_double_unlock(sk, other);
939 			sock_put(other);
940 			goto restart;
941 		}
942 
943 		err = -EPERM;
944 		if (!unix_may_send(sk, other))
945 			goto out_unlock;
946 
947 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
948 		if (err)
949 			goto out_unlock;
950 
951 	} else {
952 		/*
953 		 *	1003.1g breaking connected state with AF_UNSPEC
954 		 */
955 		other = NULL;
956 		unix_state_double_lock(sk, other);
957 	}
958 
959 	/*
960 	 * If it was connected, reconnect.
961 	 */
962 	if (unix_peer(sk)) {
963 		struct sock *old_peer = unix_peer(sk);
964 		unix_peer(sk)=other;
965 		unix_state_double_unlock(sk, other);
966 
967 		if (other != old_peer)
968 			unix_dgram_disconnected(sk, old_peer);
969 		sock_put(old_peer);
970 	} else {
971 		unix_peer(sk)=other;
972 		unix_state_double_unlock(sk, other);
973 	}
974 	return 0;
975 
976 out_unlock:
977 	unix_state_double_unlock(sk, other);
978 	sock_put(other);
979 out:
980 	return err;
981 }
982 
983 static long unix_wait_for_peer(struct sock *other, long timeo)
984 {
985 	struct unix_sock *u = unix_sk(other);
986 	int sched;
987 	DEFINE_WAIT(wait);
988 
989 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
990 
991 	sched = !sock_flag(other, SOCK_DEAD) &&
992 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
993 		unix_recvq_full(other);
994 
995 	unix_state_unlock(other);
996 
997 	if (sched)
998 		timeo = schedule_timeout(timeo);
999 
1000 	finish_wait(&u->peer_wait, &wait);
1001 	return timeo;
1002 }
1003 
1004 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1005 			       int addr_len, int flags)
1006 {
1007 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1008 	struct sock *sk = sock->sk;
1009 	struct net *net = sock_net(sk);
1010 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1011 	struct sock *newsk = NULL;
1012 	struct sock *other = NULL;
1013 	struct sk_buff *skb = NULL;
1014 	unsigned hash;
1015 	int st;
1016 	int err;
1017 	long timeo;
1018 
1019 	err = unix_mkname(sunaddr, addr_len, &hash);
1020 	if (err < 0)
1021 		goto out;
1022 	addr_len = err;
1023 
1024 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1025 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1026 		goto out;
1027 
1028 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1029 
1030 	/* First of all allocate resources.
1031 	   If we will make it after state is locked,
1032 	   we will have to recheck all again in any case.
1033 	 */
1034 
1035 	err = -ENOMEM;
1036 
1037 	/* create new sock for complete connection */
1038 	newsk = unix_create1(sock_net(sk), NULL);
1039 	if (newsk == NULL)
1040 		goto out;
1041 
1042 	/* Allocate skb for sending to listening sock */
1043 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1044 	if (skb == NULL)
1045 		goto out;
1046 
1047 restart:
1048 	/*  Find listening sock. */
1049 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1050 	if (!other)
1051 		goto out;
1052 
1053 	/* Latch state of peer */
1054 	unix_state_lock(other);
1055 
1056 	/* Apparently VFS overslept socket death. Retry. */
1057 	if (sock_flag(other, SOCK_DEAD)) {
1058 		unix_state_unlock(other);
1059 		sock_put(other);
1060 		goto restart;
1061 	}
1062 
1063 	err = -ECONNREFUSED;
1064 	if (other->sk_state != TCP_LISTEN)
1065 		goto out_unlock;
1066 
1067 	if (unix_recvq_full(other)) {
1068 		err = -EAGAIN;
1069 		if (!timeo)
1070 			goto out_unlock;
1071 
1072 		timeo = unix_wait_for_peer(other, timeo);
1073 
1074 		err = sock_intr_errno(timeo);
1075 		if (signal_pending(current))
1076 			goto out;
1077 		sock_put(other);
1078 		goto restart;
1079 	}
1080 
1081 	/* Latch our state.
1082 
1083 	   It is tricky place. We need to grab write lock and cannot
1084 	   drop lock on peer. It is dangerous because deadlock is
1085 	   possible. Connect to self case and simultaneous
1086 	   attempt to connect are eliminated by checking socket
1087 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1088 	   check this before attempt to grab lock.
1089 
1090 	   Well, and we have to recheck the state after socket locked.
1091 	 */
1092 	st = sk->sk_state;
1093 
1094 	switch (st) {
1095 	case TCP_CLOSE:
1096 		/* This is ok... continue with connect */
1097 		break;
1098 	case TCP_ESTABLISHED:
1099 		/* Socket is already connected */
1100 		err = -EISCONN;
1101 		goto out_unlock;
1102 	default:
1103 		err = -EINVAL;
1104 		goto out_unlock;
1105 	}
1106 
1107 	unix_state_lock_nested(sk);
1108 
1109 	if (sk->sk_state != st) {
1110 		unix_state_unlock(sk);
1111 		unix_state_unlock(other);
1112 		sock_put(other);
1113 		goto restart;
1114 	}
1115 
1116 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1117 	if (err) {
1118 		unix_state_unlock(sk);
1119 		goto out_unlock;
1120 	}
1121 
1122 	/* The way is open! Fastly set all the necessary fields... */
1123 
1124 	sock_hold(sk);
1125 	unix_peer(newsk)	= sk;
1126 	newsk->sk_state		= TCP_ESTABLISHED;
1127 	newsk->sk_type		= sk->sk_type;
1128 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1129 	newsk->sk_peercred.uid	= current->euid;
1130 	newsk->sk_peercred.gid	= current->egid;
1131 	newu = unix_sk(newsk);
1132 	newsk->sk_sleep		= &newu->peer_wait;
1133 	otheru = unix_sk(other);
1134 
1135 	/* copy address information from listening to new sock*/
1136 	if (otheru->addr) {
1137 		atomic_inc(&otheru->addr->refcnt);
1138 		newu->addr = otheru->addr;
1139 	}
1140 	if (otheru->dentry) {
1141 		newu->dentry	= dget(otheru->dentry);
1142 		newu->mnt	= mntget(otheru->mnt);
1143 	}
1144 
1145 	/* Set credentials */
1146 	sk->sk_peercred = other->sk_peercred;
1147 
1148 	sock->state	= SS_CONNECTED;
1149 	sk->sk_state	= TCP_ESTABLISHED;
1150 	sock_hold(newsk);
1151 
1152 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1153 	unix_peer(sk)	= newsk;
1154 
1155 	unix_state_unlock(sk);
1156 
1157 	/* take ten and and send info to listening sock */
1158 	spin_lock(&other->sk_receive_queue.lock);
1159 	__skb_queue_tail(&other->sk_receive_queue, skb);
1160 	spin_unlock(&other->sk_receive_queue.lock);
1161 	unix_state_unlock(other);
1162 	other->sk_data_ready(other, 0);
1163 	sock_put(other);
1164 	return 0;
1165 
1166 out_unlock:
1167 	if (other)
1168 		unix_state_unlock(other);
1169 
1170 out:
1171 	if (skb)
1172 		kfree_skb(skb);
1173 	if (newsk)
1174 		unix_release_sock(newsk, 0);
1175 	if (other)
1176 		sock_put(other);
1177 	return err;
1178 }
1179 
1180 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1181 {
1182 	struct sock *ska=socka->sk, *skb = sockb->sk;
1183 
1184 	/* Join our sockets back to back */
1185 	sock_hold(ska);
1186 	sock_hold(skb);
1187 	unix_peer(ska)=skb;
1188 	unix_peer(skb)=ska;
1189 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1190 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1191 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1192 
1193 	if (ska->sk_type != SOCK_DGRAM) {
1194 		ska->sk_state = TCP_ESTABLISHED;
1195 		skb->sk_state = TCP_ESTABLISHED;
1196 		socka->state  = SS_CONNECTED;
1197 		sockb->state  = SS_CONNECTED;
1198 	}
1199 	return 0;
1200 }
1201 
1202 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1203 {
1204 	struct sock *sk = sock->sk;
1205 	struct sock *tsk;
1206 	struct sk_buff *skb;
1207 	int err;
1208 
1209 	err = -EOPNOTSUPP;
1210 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1211 		goto out;
1212 
1213 	err = -EINVAL;
1214 	if (sk->sk_state != TCP_LISTEN)
1215 		goto out;
1216 
1217 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1218 	 * so that no locks are necessary.
1219 	 */
1220 
1221 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1222 	if (!skb) {
1223 		/* This means receive shutdown. */
1224 		if (err == 0)
1225 			err = -EINVAL;
1226 		goto out;
1227 	}
1228 
1229 	tsk = skb->sk;
1230 	skb_free_datagram(sk, skb);
1231 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1232 
1233 	/* attach accepted sock to socket */
1234 	unix_state_lock(tsk);
1235 	newsock->state = SS_CONNECTED;
1236 	sock_graft(tsk, newsock);
1237 	unix_state_unlock(tsk);
1238 	return 0;
1239 
1240 out:
1241 	return err;
1242 }
1243 
1244 
1245 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1246 {
1247 	struct sock *sk = sock->sk;
1248 	struct unix_sock *u;
1249 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1250 	int err = 0;
1251 
1252 	if (peer) {
1253 		sk = unix_peer_get(sk);
1254 
1255 		err = -ENOTCONN;
1256 		if (!sk)
1257 			goto out;
1258 		err = 0;
1259 	} else {
1260 		sock_hold(sk);
1261 	}
1262 
1263 	u = unix_sk(sk);
1264 	unix_state_lock(sk);
1265 	if (!u->addr) {
1266 		sunaddr->sun_family = AF_UNIX;
1267 		sunaddr->sun_path[0] = 0;
1268 		*uaddr_len = sizeof(short);
1269 	} else {
1270 		struct unix_address *addr = u->addr;
1271 
1272 		*uaddr_len = addr->len;
1273 		memcpy(sunaddr, addr->name, *uaddr_len);
1274 	}
1275 	unix_state_unlock(sk);
1276 	sock_put(sk);
1277 out:
1278 	return err;
1279 }
1280 
1281 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1282 {
1283 	int i;
1284 
1285 	scm->fp = UNIXCB(skb).fp;
1286 	skb->destructor = sock_wfree;
1287 	UNIXCB(skb).fp = NULL;
1288 
1289 	for (i=scm->fp->count-1; i>=0; i--)
1290 		unix_notinflight(scm->fp->fp[i]);
1291 }
1292 
1293 static void unix_destruct_fds(struct sk_buff *skb)
1294 {
1295 	struct scm_cookie scm;
1296 	memset(&scm, 0, sizeof(scm));
1297 	unix_detach_fds(&scm, skb);
1298 
1299 	/* Alas, it calls VFS */
1300 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1301 	scm_destroy(&scm);
1302 	sock_wfree(skb);
1303 }
1304 
1305 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1306 {
1307 	int i;
1308 	for (i=scm->fp->count-1; i>=0; i--)
1309 		unix_inflight(scm->fp->fp[i]);
1310 	UNIXCB(skb).fp = scm->fp;
1311 	skb->destructor = unix_destruct_fds;
1312 	scm->fp = NULL;
1313 }
1314 
1315 /*
1316  *	Send AF_UNIX data.
1317  */
1318 
1319 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1320 			      struct msghdr *msg, size_t len)
1321 {
1322 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1323 	struct sock *sk = sock->sk;
1324 	struct net *net = sock_net(sk);
1325 	struct unix_sock *u = unix_sk(sk);
1326 	struct sockaddr_un *sunaddr=msg->msg_name;
1327 	struct sock *other = NULL;
1328 	int namelen = 0; /* fake GCC */
1329 	int err;
1330 	unsigned hash;
1331 	struct sk_buff *skb;
1332 	long timeo;
1333 	struct scm_cookie tmp_scm;
1334 
1335 	if (NULL == siocb->scm)
1336 		siocb->scm = &tmp_scm;
1337 	err = scm_send(sock, msg, siocb->scm);
1338 	if (err < 0)
1339 		return err;
1340 
1341 	err = -EOPNOTSUPP;
1342 	if (msg->msg_flags&MSG_OOB)
1343 		goto out;
1344 
1345 	if (msg->msg_namelen) {
1346 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1347 		if (err < 0)
1348 			goto out;
1349 		namelen = err;
1350 	} else {
1351 		sunaddr = NULL;
1352 		err = -ENOTCONN;
1353 		other = unix_peer_get(sk);
1354 		if (!other)
1355 			goto out;
1356 	}
1357 
1358 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1359 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1360 		goto out;
1361 
1362 	err = -EMSGSIZE;
1363 	if (len > sk->sk_sndbuf - 32)
1364 		goto out;
1365 
1366 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1367 	if (skb==NULL)
1368 		goto out;
1369 
1370 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1371 	if (siocb->scm->fp)
1372 		unix_attach_fds(siocb->scm, skb);
1373 	unix_get_secdata(siocb->scm, skb);
1374 
1375 	skb_reset_transport_header(skb);
1376 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1377 	if (err)
1378 		goto out_free;
1379 
1380 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1381 
1382 restart:
1383 	if (!other) {
1384 		err = -ECONNRESET;
1385 		if (sunaddr == NULL)
1386 			goto out_free;
1387 
1388 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1389 					hash, &err);
1390 		if (other==NULL)
1391 			goto out_free;
1392 	}
1393 
1394 	unix_state_lock(other);
1395 	err = -EPERM;
1396 	if (!unix_may_send(sk, other))
1397 		goto out_unlock;
1398 
1399 	if (sock_flag(other, SOCK_DEAD)) {
1400 		/*
1401 		 *	Check with 1003.1g - what should
1402 		 *	datagram error
1403 		 */
1404 		unix_state_unlock(other);
1405 		sock_put(other);
1406 
1407 		err = 0;
1408 		unix_state_lock(sk);
1409 		if (unix_peer(sk) == other) {
1410 			unix_peer(sk)=NULL;
1411 			unix_state_unlock(sk);
1412 
1413 			unix_dgram_disconnected(sk, other);
1414 			sock_put(other);
1415 			err = -ECONNREFUSED;
1416 		} else {
1417 			unix_state_unlock(sk);
1418 		}
1419 
1420 		other = NULL;
1421 		if (err)
1422 			goto out_free;
1423 		goto restart;
1424 	}
1425 
1426 	err = -EPIPE;
1427 	if (other->sk_shutdown & RCV_SHUTDOWN)
1428 		goto out_unlock;
1429 
1430 	if (sk->sk_type != SOCK_SEQPACKET) {
1431 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1432 		if (err)
1433 			goto out_unlock;
1434 	}
1435 
1436 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1437 		if (!timeo) {
1438 			err = -EAGAIN;
1439 			goto out_unlock;
1440 		}
1441 
1442 		timeo = unix_wait_for_peer(other, timeo);
1443 
1444 		err = sock_intr_errno(timeo);
1445 		if (signal_pending(current))
1446 			goto out_free;
1447 
1448 		goto restart;
1449 	}
1450 
1451 	skb_queue_tail(&other->sk_receive_queue, skb);
1452 	unix_state_unlock(other);
1453 	other->sk_data_ready(other, len);
1454 	sock_put(other);
1455 	scm_destroy(siocb->scm);
1456 	return len;
1457 
1458 out_unlock:
1459 	unix_state_unlock(other);
1460 out_free:
1461 	kfree_skb(skb);
1462 out:
1463 	if (other)
1464 		sock_put(other);
1465 	scm_destroy(siocb->scm);
1466 	return err;
1467 }
1468 
1469 
1470 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1471 			       struct msghdr *msg, size_t len)
1472 {
1473 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1474 	struct sock *sk = sock->sk;
1475 	struct sock *other = NULL;
1476 	struct sockaddr_un *sunaddr=msg->msg_name;
1477 	int err,size;
1478 	struct sk_buff *skb;
1479 	int sent=0;
1480 	struct scm_cookie tmp_scm;
1481 
1482 	if (NULL == siocb->scm)
1483 		siocb->scm = &tmp_scm;
1484 	err = scm_send(sock, msg, siocb->scm);
1485 	if (err < 0)
1486 		return err;
1487 
1488 	err = -EOPNOTSUPP;
1489 	if (msg->msg_flags&MSG_OOB)
1490 		goto out_err;
1491 
1492 	if (msg->msg_namelen) {
1493 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1494 		goto out_err;
1495 	} else {
1496 		sunaddr = NULL;
1497 		err = -ENOTCONN;
1498 		other = unix_peer(sk);
1499 		if (!other)
1500 			goto out_err;
1501 	}
1502 
1503 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1504 		goto pipe_err;
1505 
1506 	while(sent < len)
1507 	{
1508 		/*
1509 		 *	Optimisation for the fact that under 0.01% of X
1510 		 *	messages typically need breaking up.
1511 		 */
1512 
1513 		size = len-sent;
1514 
1515 		/* Keep two messages in the pipe so it schedules better */
1516 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1517 			size = (sk->sk_sndbuf >> 1) - 64;
1518 
1519 		if (size > SKB_MAX_ALLOC)
1520 			size = SKB_MAX_ALLOC;
1521 
1522 		/*
1523 		 *	Grab a buffer
1524 		 */
1525 
1526 		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1527 
1528 		if (skb==NULL)
1529 			goto out_err;
1530 
1531 		/*
1532 		 *	If you pass two values to the sock_alloc_send_skb
1533 		 *	it tries to grab the large buffer with GFP_NOFS
1534 		 *	(which can fail easily), and if it fails grab the
1535 		 *	fallback size buffer which is under a page and will
1536 		 *	succeed. [Alan]
1537 		 */
1538 		size = min_t(int, size, skb_tailroom(skb));
1539 
1540 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1541 		if (siocb->scm->fp)
1542 			unix_attach_fds(siocb->scm, skb);
1543 
1544 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1545 			kfree_skb(skb);
1546 			goto out_err;
1547 		}
1548 
1549 		unix_state_lock(other);
1550 
1551 		if (sock_flag(other, SOCK_DEAD) ||
1552 		    (other->sk_shutdown & RCV_SHUTDOWN))
1553 			goto pipe_err_free;
1554 
1555 		skb_queue_tail(&other->sk_receive_queue, skb);
1556 		unix_state_unlock(other);
1557 		other->sk_data_ready(other, size);
1558 		sent+=size;
1559 	}
1560 
1561 	scm_destroy(siocb->scm);
1562 	siocb->scm = NULL;
1563 
1564 	return sent;
1565 
1566 pipe_err_free:
1567 	unix_state_unlock(other);
1568 	kfree_skb(skb);
1569 pipe_err:
1570 	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1571 		send_sig(SIGPIPE,current,0);
1572 	err = -EPIPE;
1573 out_err:
1574 	scm_destroy(siocb->scm);
1575 	siocb->scm = NULL;
1576 	return sent ? : err;
1577 }
1578 
1579 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1580 				  struct msghdr *msg, size_t len)
1581 {
1582 	int err;
1583 	struct sock *sk = sock->sk;
1584 
1585 	err = sock_error(sk);
1586 	if (err)
1587 		return err;
1588 
1589 	if (sk->sk_state != TCP_ESTABLISHED)
1590 		return -ENOTCONN;
1591 
1592 	if (msg->msg_namelen)
1593 		msg->msg_namelen = 0;
1594 
1595 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1596 }
1597 
1598 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1599 {
1600 	struct unix_sock *u = unix_sk(sk);
1601 
1602 	msg->msg_namelen = 0;
1603 	if (u->addr) {
1604 		msg->msg_namelen = u->addr->len;
1605 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1606 	}
1607 }
1608 
1609 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1610 			      struct msghdr *msg, size_t size,
1611 			      int flags)
1612 {
1613 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1614 	struct scm_cookie tmp_scm;
1615 	struct sock *sk = sock->sk;
1616 	struct unix_sock *u = unix_sk(sk);
1617 	int noblock = flags & MSG_DONTWAIT;
1618 	struct sk_buff *skb;
1619 	int err;
1620 
1621 	err = -EOPNOTSUPP;
1622 	if (flags&MSG_OOB)
1623 		goto out;
1624 
1625 	msg->msg_namelen = 0;
1626 
1627 	mutex_lock(&u->readlock);
1628 
1629 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1630 	if (!skb) {
1631 		unix_state_lock(sk);
1632 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1633 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1634 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1635 			err = 0;
1636 		unix_state_unlock(sk);
1637 		goto out_unlock;
1638 	}
1639 
1640 	wake_up_interruptible_sync(&u->peer_wait);
1641 
1642 	if (msg->msg_name)
1643 		unix_copy_addr(msg, skb->sk);
1644 
1645 	if (size > skb->len)
1646 		size = skb->len;
1647 	else if (size < skb->len)
1648 		msg->msg_flags |= MSG_TRUNC;
1649 
1650 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1651 	if (err)
1652 		goto out_free;
1653 
1654 	if (!siocb->scm) {
1655 		siocb->scm = &tmp_scm;
1656 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1657 	}
1658 	siocb->scm->creds = *UNIXCREDS(skb);
1659 	unix_set_secdata(siocb->scm, skb);
1660 
1661 	if (!(flags & MSG_PEEK))
1662 	{
1663 		if (UNIXCB(skb).fp)
1664 			unix_detach_fds(siocb->scm, skb);
1665 	}
1666 	else
1667 	{
1668 		/* It is questionable: on PEEK we could:
1669 		   - do not return fds - good, but too simple 8)
1670 		   - return fds, and do not return them on read (old strategy,
1671 		     apparently wrong)
1672 		   - clone fds (I chose it for now, it is the most universal
1673 		     solution)
1674 
1675 		   POSIX 1003.1g does not actually define this clearly
1676 		   at all. POSIX 1003.1g doesn't define a lot of things
1677 		   clearly however!
1678 
1679 		*/
1680 		if (UNIXCB(skb).fp)
1681 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1682 	}
1683 	err = size;
1684 
1685 	scm_recv(sock, msg, siocb->scm, flags);
1686 
1687 out_free:
1688 	skb_free_datagram(sk,skb);
1689 out_unlock:
1690 	mutex_unlock(&u->readlock);
1691 out:
1692 	return err;
1693 }
1694 
1695 /*
1696  *	Sleep until data has arrive. But check for races..
1697  */
1698 
1699 static long unix_stream_data_wait(struct sock * sk, long timeo)
1700 {
1701 	DEFINE_WAIT(wait);
1702 
1703 	unix_state_lock(sk);
1704 
1705 	for (;;) {
1706 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1707 
1708 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1709 		    sk->sk_err ||
1710 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1711 		    signal_pending(current) ||
1712 		    !timeo)
1713 			break;
1714 
1715 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1716 		unix_state_unlock(sk);
1717 		timeo = schedule_timeout(timeo);
1718 		unix_state_lock(sk);
1719 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1720 	}
1721 
1722 	finish_wait(sk->sk_sleep, &wait);
1723 	unix_state_unlock(sk);
1724 	return timeo;
1725 }
1726 
1727 
1728 
1729 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1730 			       struct msghdr *msg, size_t size,
1731 			       int flags)
1732 {
1733 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1734 	struct scm_cookie tmp_scm;
1735 	struct sock *sk = sock->sk;
1736 	struct unix_sock *u = unix_sk(sk);
1737 	struct sockaddr_un *sunaddr=msg->msg_name;
1738 	int copied = 0;
1739 	int check_creds = 0;
1740 	int target;
1741 	int err = 0;
1742 	long timeo;
1743 
1744 	err = -EINVAL;
1745 	if (sk->sk_state != TCP_ESTABLISHED)
1746 		goto out;
1747 
1748 	err = -EOPNOTSUPP;
1749 	if (flags&MSG_OOB)
1750 		goto out;
1751 
1752 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1753 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1754 
1755 	msg->msg_namelen = 0;
1756 
1757 	/* Lock the socket to prevent queue disordering
1758 	 * while sleeps in memcpy_tomsg
1759 	 */
1760 
1761 	if (!siocb->scm) {
1762 		siocb->scm = &tmp_scm;
1763 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1764 	}
1765 
1766 	mutex_lock(&u->readlock);
1767 
1768 	do
1769 	{
1770 		int chunk;
1771 		struct sk_buff *skb;
1772 
1773 		unix_state_lock(sk);
1774 		skb = skb_dequeue(&sk->sk_receive_queue);
1775 		if (skb==NULL)
1776 		{
1777 			if (copied >= target)
1778 				goto unlock;
1779 
1780 			/*
1781 			 *	POSIX 1003.1g mandates this order.
1782 			 */
1783 
1784 			if ((err = sock_error(sk)) != 0)
1785 				goto unlock;
1786 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1787 				goto unlock;
1788 
1789 			unix_state_unlock(sk);
1790 			err = -EAGAIN;
1791 			if (!timeo)
1792 				break;
1793 			mutex_unlock(&u->readlock);
1794 
1795 			timeo = unix_stream_data_wait(sk, timeo);
1796 
1797 			if (signal_pending(current)) {
1798 				err = sock_intr_errno(timeo);
1799 				goto out;
1800 			}
1801 			mutex_lock(&u->readlock);
1802 			continue;
1803  unlock:
1804 			unix_state_unlock(sk);
1805 			break;
1806 		}
1807 		unix_state_unlock(sk);
1808 
1809 		if (check_creds) {
1810 			/* Never glue messages from different writers */
1811 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1812 				skb_queue_head(&sk->sk_receive_queue, skb);
1813 				break;
1814 			}
1815 		} else {
1816 			/* Copy credentials */
1817 			siocb->scm->creds = *UNIXCREDS(skb);
1818 			check_creds = 1;
1819 		}
1820 
1821 		/* Copy address just once */
1822 		if (sunaddr)
1823 		{
1824 			unix_copy_addr(msg, skb->sk);
1825 			sunaddr = NULL;
1826 		}
1827 
1828 		chunk = min_t(unsigned int, skb->len, size);
1829 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1830 			skb_queue_head(&sk->sk_receive_queue, skb);
1831 			if (copied == 0)
1832 				copied = -EFAULT;
1833 			break;
1834 		}
1835 		copied += chunk;
1836 		size -= chunk;
1837 
1838 		/* Mark read part of skb as used */
1839 		if (!(flags & MSG_PEEK))
1840 		{
1841 			skb_pull(skb, chunk);
1842 
1843 			if (UNIXCB(skb).fp)
1844 				unix_detach_fds(siocb->scm, skb);
1845 
1846 			/* put the skb back if we didn't use it up.. */
1847 			if (skb->len)
1848 			{
1849 				skb_queue_head(&sk->sk_receive_queue, skb);
1850 				break;
1851 			}
1852 
1853 			kfree_skb(skb);
1854 
1855 			if (siocb->scm->fp)
1856 				break;
1857 		}
1858 		else
1859 		{
1860 			/* It is questionable, see note in unix_dgram_recvmsg.
1861 			 */
1862 			if (UNIXCB(skb).fp)
1863 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1864 
1865 			/* put message back and return */
1866 			skb_queue_head(&sk->sk_receive_queue, skb);
1867 			break;
1868 		}
1869 	} while (size);
1870 
1871 	mutex_unlock(&u->readlock);
1872 	scm_recv(sock, msg, siocb->scm, flags);
1873 out:
1874 	return copied ? : err;
1875 }
1876 
1877 static int unix_shutdown(struct socket *sock, int mode)
1878 {
1879 	struct sock *sk = sock->sk;
1880 	struct sock *other;
1881 
1882 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1883 
1884 	if (mode) {
1885 		unix_state_lock(sk);
1886 		sk->sk_shutdown |= mode;
1887 		other=unix_peer(sk);
1888 		if (other)
1889 			sock_hold(other);
1890 		unix_state_unlock(sk);
1891 		sk->sk_state_change(sk);
1892 
1893 		if (other &&
1894 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1895 
1896 			int peer_mode = 0;
1897 
1898 			if (mode&RCV_SHUTDOWN)
1899 				peer_mode |= SEND_SHUTDOWN;
1900 			if (mode&SEND_SHUTDOWN)
1901 				peer_mode |= RCV_SHUTDOWN;
1902 			unix_state_lock(other);
1903 			other->sk_shutdown |= peer_mode;
1904 			unix_state_unlock(other);
1905 			other->sk_state_change(other);
1906 			read_lock(&other->sk_callback_lock);
1907 			if (peer_mode == SHUTDOWN_MASK)
1908 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1909 			else if (peer_mode & RCV_SHUTDOWN)
1910 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1911 			read_unlock(&other->sk_callback_lock);
1912 		}
1913 		if (other)
1914 			sock_put(other);
1915 	}
1916 	return 0;
1917 }
1918 
1919 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1920 {
1921 	struct sock *sk = sock->sk;
1922 	long amount=0;
1923 	int err;
1924 
1925 	switch(cmd)
1926 	{
1927 		case SIOCOUTQ:
1928 			amount = atomic_read(&sk->sk_wmem_alloc);
1929 			err = put_user(amount, (int __user *)arg);
1930 			break;
1931 		case SIOCINQ:
1932 		{
1933 			struct sk_buff *skb;
1934 
1935 			if (sk->sk_state == TCP_LISTEN) {
1936 				err = -EINVAL;
1937 				break;
1938 			}
1939 
1940 			spin_lock(&sk->sk_receive_queue.lock);
1941 			if (sk->sk_type == SOCK_STREAM ||
1942 			    sk->sk_type == SOCK_SEQPACKET) {
1943 				skb_queue_walk(&sk->sk_receive_queue, skb)
1944 					amount += skb->len;
1945 			} else {
1946 				skb = skb_peek(&sk->sk_receive_queue);
1947 				if (skb)
1948 					amount=skb->len;
1949 			}
1950 			spin_unlock(&sk->sk_receive_queue.lock);
1951 			err = put_user(amount, (int __user *)arg);
1952 			break;
1953 		}
1954 
1955 		default:
1956 			err = -ENOIOCTLCMD;
1957 			break;
1958 	}
1959 	return err;
1960 }
1961 
1962 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1963 {
1964 	struct sock *sk = sock->sk;
1965 	unsigned int mask;
1966 
1967 	poll_wait(file, sk->sk_sleep, wait);
1968 	mask = 0;
1969 
1970 	/* exceptional events? */
1971 	if (sk->sk_err)
1972 		mask |= POLLERR;
1973 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1974 		mask |= POLLHUP;
1975 	if (sk->sk_shutdown & RCV_SHUTDOWN)
1976 		mask |= POLLRDHUP;
1977 
1978 	/* readable? */
1979 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1980 	    (sk->sk_shutdown & RCV_SHUTDOWN))
1981 		mask |= POLLIN | POLLRDNORM;
1982 
1983 	/* Connection-based need to check for termination and startup */
1984 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1985 		mask |= POLLHUP;
1986 
1987 	/*
1988 	 * we set writable also when the other side has shut down the
1989 	 * connection. This prevents stuck sockets.
1990 	 */
1991 	if (unix_writable(sk))
1992 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1993 
1994 	return mask;
1995 }
1996 
1997 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
1998 				    poll_table *wait)
1999 {
2000 	struct sock *sk = sock->sk, *other;
2001 	unsigned int mask, writable;
2002 
2003 	poll_wait(file, sk->sk_sleep, wait);
2004 	mask = 0;
2005 
2006 	/* exceptional events? */
2007 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2008 		mask |= POLLERR;
2009 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2010 		mask |= POLLRDHUP;
2011 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2012 		mask |= POLLHUP;
2013 
2014 	/* readable? */
2015 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2016 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2017 		mask |= POLLIN | POLLRDNORM;
2018 
2019 	/* Connection-based need to check for termination and startup */
2020 	if (sk->sk_type == SOCK_SEQPACKET) {
2021 		if (sk->sk_state == TCP_CLOSE)
2022 			mask |= POLLHUP;
2023 		/* connection hasn't started yet? */
2024 		if (sk->sk_state == TCP_SYN_SENT)
2025 			return mask;
2026 	}
2027 
2028 	/* writable? */
2029 	writable = unix_writable(sk);
2030 	if (writable) {
2031 		other = unix_peer_get(sk);
2032 		if (other) {
2033 			if (unix_peer(other) != sk) {
2034 				poll_wait(file, &unix_sk(other)->peer_wait,
2035 					  wait);
2036 				if (unix_recvq_full(other))
2037 					writable = 0;
2038 			}
2039 
2040 			sock_put(other);
2041 		}
2042 	}
2043 
2044 	if (writable)
2045 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2046 	else
2047 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2048 
2049 	return mask;
2050 }
2051 
2052 #ifdef CONFIG_PROC_FS
2053 static struct sock *first_unix_socket(int *i)
2054 {
2055 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2056 		if (!hlist_empty(&unix_socket_table[*i]))
2057 			return __sk_head(&unix_socket_table[*i]);
2058 	}
2059 	return NULL;
2060 }
2061 
2062 static struct sock *next_unix_socket(int *i, struct sock *s)
2063 {
2064 	struct sock *next = sk_next(s);
2065 	/* More in this chain? */
2066 	if (next)
2067 		return next;
2068 	/* Look for next non-empty chain. */
2069 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2070 		if (!hlist_empty(&unix_socket_table[*i]))
2071 			return __sk_head(&unix_socket_table[*i]);
2072 	}
2073 	return NULL;
2074 }
2075 
2076 struct unix_iter_state {
2077 	struct seq_net_private p;
2078 	int i;
2079 };
2080 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2081 {
2082 	struct unix_iter_state *iter = seq->private;
2083 	loff_t off = 0;
2084 	struct sock *s;
2085 
2086 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2087 		if (sock_net(s) != seq_file_net(seq))
2088 			continue;
2089 		if (off == pos)
2090 			return s;
2091 		++off;
2092 	}
2093 	return NULL;
2094 }
2095 
2096 
2097 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2098 	__acquires(unix_table_lock)
2099 {
2100 	spin_lock(&unix_table_lock);
2101 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2102 }
2103 
2104 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2105 {
2106 	struct unix_iter_state *iter = seq->private;
2107 	struct sock *sk = v;
2108 	++*pos;
2109 
2110 	if (v == SEQ_START_TOKEN)
2111 		sk = first_unix_socket(&iter->i);
2112 	else
2113 		sk = next_unix_socket(&iter->i, sk);
2114 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2115 		sk = next_unix_socket(&iter->i, sk);
2116 	return sk;
2117 }
2118 
2119 static void unix_seq_stop(struct seq_file *seq, void *v)
2120 	__releases(unix_table_lock)
2121 {
2122 	spin_unlock(&unix_table_lock);
2123 }
2124 
2125 static int unix_seq_show(struct seq_file *seq, void *v)
2126 {
2127 
2128 	if (v == SEQ_START_TOKEN)
2129 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2130 			 "Inode Path\n");
2131 	else {
2132 		struct sock *s = v;
2133 		struct unix_sock *u = unix_sk(s);
2134 		unix_state_lock(s);
2135 
2136 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2137 			s,
2138 			atomic_read(&s->sk_refcnt),
2139 			0,
2140 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2141 			s->sk_type,
2142 			s->sk_socket ?
2143 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2144 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2145 			sock_i_ino(s));
2146 
2147 		if (u->addr) {
2148 			int i, len;
2149 			seq_putc(seq, ' ');
2150 
2151 			i = 0;
2152 			len = u->addr->len - sizeof(short);
2153 			if (!UNIX_ABSTRACT(s))
2154 				len--;
2155 			else {
2156 				seq_putc(seq, '@');
2157 				i++;
2158 			}
2159 			for ( ; i < len; i++)
2160 				seq_putc(seq, u->addr->name->sun_path[i]);
2161 		}
2162 		unix_state_unlock(s);
2163 		seq_putc(seq, '\n');
2164 	}
2165 
2166 	return 0;
2167 }
2168 
2169 static const struct seq_operations unix_seq_ops = {
2170 	.start  = unix_seq_start,
2171 	.next   = unix_seq_next,
2172 	.stop   = unix_seq_stop,
2173 	.show   = unix_seq_show,
2174 };
2175 
2176 
2177 static int unix_seq_open(struct inode *inode, struct file *file)
2178 {
2179 	return seq_open_net(inode, file, &unix_seq_ops,
2180 			    sizeof(struct unix_iter_state));
2181 }
2182 
2183 static const struct file_operations unix_seq_fops = {
2184 	.owner		= THIS_MODULE,
2185 	.open		= unix_seq_open,
2186 	.read		= seq_read,
2187 	.llseek		= seq_lseek,
2188 	.release	= seq_release_net,
2189 };
2190 
2191 #endif
2192 
2193 static struct net_proto_family unix_family_ops = {
2194 	.family = PF_UNIX,
2195 	.create = unix_create,
2196 	.owner	= THIS_MODULE,
2197 };
2198 
2199 
2200 static int unix_net_init(struct net *net)
2201 {
2202 	int error = -ENOMEM;
2203 
2204 	net->unx.sysctl_max_dgram_qlen = 10;
2205 	if (unix_sysctl_register(net))
2206 		goto out;
2207 
2208 #ifdef CONFIG_PROC_FS
2209 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2210 		unix_sysctl_unregister(net);
2211 		goto out;
2212 	}
2213 #endif
2214 	error = 0;
2215 out:
2216 	return error;
2217 }
2218 
2219 static void unix_net_exit(struct net *net)
2220 {
2221 	unix_sysctl_unregister(net);
2222 	proc_net_remove(net, "unix");
2223 }
2224 
2225 static struct pernet_operations unix_net_ops = {
2226 	.init = unix_net_init,
2227 	.exit = unix_net_exit,
2228 };
2229 
2230 static int __init af_unix_init(void)
2231 {
2232 	int rc = -1;
2233 	struct sk_buff *dummy_skb;
2234 
2235 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2236 
2237 	rc = proto_register(&unix_proto, 1);
2238 	if (rc != 0) {
2239 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2240 		       __func__);
2241 		goto out;
2242 	}
2243 
2244 	sock_register(&unix_family_ops);
2245 	register_pernet_subsys(&unix_net_ops);
2246 out:
2247 	return rc;
2248 }
2249 
2250 static void __exit af_unix_exit(void)
2251 {
2252 	sock_unregister(PF_UNIX);
2253 	proto_unregister(&unix_proto);
2254 	unregister_pernet_subsys(&unix_net_ops);
2255 }
2256 
2257 /* Earlier than device_initcall() so that other drivers invoking
2258    request_module() don't end up in a loop when modprobe tries
2259    to use a UNIX socket. But later than subsys_initcall() because
2260    we depend on stuff initialised there */
2261 fs_initcall(af_unix_init);
2262 module_exit(af_unix_exit);
2263 
2264 MODULE_LICENSE("GPL");
2265 MODULE_ALIAS_NETPROTO(PF_UNIX);
2266