xref: /openbmc/linux/net/unix/af_unix.c (revision 82ced6fd)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
314 static void unix_write_space(struct sock *sk)
315 {
316 	read_lock(&sk->sk_callback_lock);
317 	if (unix_writable(sk)) {
318 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
319 			wake_up_interruptible_sync(sk->sk_sleep);
320 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 	}
322 	read_unlock(&sk->sk_callback_lock);
323 }
324 
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332 		skb_queue_purge(&sk->sk_receive_queue);
333 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334 
335 		/* If one link of bidirectional dgram pipe is disconnected,
336 		 * we signal error. Messages are lost. Do not make this,
337 		 * when peer was not connected to us.
338 		 */
339 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340 			other->sk_err = ECONNRESET;
341 			other->sk_error_report(other);
342 		}
343 	}
344 }
345 
346 static void unix_sock_destructor(struct sock *sk)
347 {
348 	struct unix_sock *u = unix_sk(sk);
349 
350 	skb_queue_purge(&sk->sk_receive_queue);
351 
352 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353 	WARN_ON(!sk_unhashed(sk));
354 	WARN_ON(sk->sk_socket);
355 	if (!sock_flag(sk, SOCK_DEAD)) {
356 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357 		return;
358 	}
359 
360 	if (u->addr)
361 		unix_release_addr(u->addr);
362 
363 	atomic_dec(&unix_nr_socks);
364 	local_bh_disable();
365 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 	local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369 		atomic_read(&unix_nr_socks));
370 #endif
371 }
372 
373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375 	struct unix_sock *u = unix_sk(sk);
376 	struct dentry *dentry;
377 	struct vfsmount *mnt;
378 	struct sock *skpair;
379 	struct sk_buff *skb;
380 	int state;
381 
382 	unix_remove_socket(sk);
383 
384 	/* Clear state */
385 	unix_state_lock(sk);
386 	sock_orphan(sk);
387 	sk->sk_shutdown = SHUTDOWN_MASK;
388 	dentry	     = u->dentry;
389 	u->dentry    = NULL;
390 	mnt	     = u->mnt;
391 	u->mnt	     = NULL;
392 	state = sk->sk_state;
393 	sk->sk_state = TCP_CLOSE;
394 	unix_state_unlock(sk);
395 
396 	wake_up_interruptible_all(&u->peer_wait);
397 
398 	skpair = unix_peer(sk);
399 
400 	if (skpair != NULL) {
401 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402 			unix_state_lock(skpair);
403 			/* No more writes */
404 			skpair->sk_shutdown = SHUTDOWN_MASK;
405 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406 				skpair->sk_err = ECONNRESET;
407 			unix_state_unlock(skpair);
408 			skpair->sk_state_change(skpair);
409 			read_lock(&skpair->sk_callback_lock);
410 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 			read_unlock(&skpair->sk_callback_lock);
412 		}
413 		sock_put(skpair); /* It may now die */
414 		unix_peer(sk) = NULL;
415 	}
416 
417 	/* Try to flush out this socket. Throw out buffers at least */
418 
419 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420 		if (state == TCP_LISTEN)
421 			unix_release_sock(skb->sk, 1);
422 		/* passed fds are erased in the kfree_skb hook	      */
423 		kfree_skb(skb);
424 	}
425 
426 	if (dentry) {
427 		dput(dentry);
428 		mntput(mnt);
429 	}
430 
431 	sock_put(sk);
432 
433 	/* ---- Socket is dead now and most probably destroyed ---- */
434 
435 	/*
436 	 * Fixme: BSD difference: In BSD all sockets connected to use get
437 	 *	  ECONNRESET and we die on the spot. In Linux we behave
438 	 *	  like files and pipes do and wait for the last
439 	 *	  dereference.
440 	 *
441 	 * Can't we simply set sock->err?
442 	 *
443 	 *	  What the above comment does talk about? --ANK(980817)
444 	 */
445 
446 	if (unix_tot_inflight)
447 		unix_gc();		/* Garbage collect fds */
448 
449 	return 0;
450 }
451 
452 static int unix_listen(struct socket *sock, int backlog)
453 {
454 	int err;
455 	struct sock *sk = sock->sk;
456 	struct unix_sock *u = unix_sk(sk);
457 
458 	err = -EOPNOTSUPP;
459 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460 		goto out;	/* Only stream/seqpacket sockets accept */
461 	err = -EINVAL;
462 	if (!u->addr)
463 		goto out;	/* No listens on an unbound socket */
464 	unix_state_lock(sk);
465 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466 		goto out_unlock;
467 	if (backlog > sk->sk_max_ack_backlog)
468 		wake_up_interruptible_all(&u->peer_wait);
469 	sk->sk_max_ack_backlog	= backlog;
470 	sk->sk_state		= TCP_LISTEN;
471 	/* set credentials so connect can copy them */
472 	sk->sk_peercred.pid	= task_tgid_vnr(current);
473 	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474 	err = 0;
475 
476 out_unlock:
477 	unix_state_unlock(sk);
478 out:
479 	return err;
480 }
481 
482 static int unix_release(struct socket *);
483 static int unix_bind(struct socket *, struct sockaddr *, int);
484 static int unix_stream_connect(struct socket *, struct sockaddr *,
485 			       int addr_len, int flags);
486 static int unix_socketpair(struct socket *, struct socket *);
487 static int unix_accept(struct socket *, struct socket *, int);
488 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490 static unsigned int unix_dgram_poll(struct file *, struct socket *,
491 				    poll_table *);
492 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493 static int unix_shutdown(struct socket *, int);
494 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t);
496 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497 			       struct msghdr *, size_t, int);
498 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t);
500 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501 			      struct msghdr *, size_t, int);
502 static int unix_dgram_connect(struct socket *, struct sockaddr *,
503 			      int, int);
504 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505 				  struct msghdr *, size_t);
506 
507 static const struct proto_ops unix_stream_ops = {
508 	.family =	PF_UNIX,
509 	.owner =	THIS_MODULE,
510 	.release =	unix_release,
511 	.bind =		unix_bind,
512 	.connect =	unix_stream_connect,
513 	.socketpair =	unix_socketpair,
514 	.accept =	unix_accept,
515 	.getname =	unix_getname,
516 	.poll =		unix_poll,
517 	.ioctl =	unix_ioctl,
518 	.listen =	unix_listen,
519 	.shutdown =	unix_shutdown,
520 	.setsockopt =	sock_no_setsockopt,
521 	.getsockopt =	sock_no_getsockopt,
522 	.sendmsg =	unix_stream_sendmsg,
523 	.recvmsg =	unix_stream_recvmsg,
524 	.mmap =		sock_no_mmap,
525 	.sendpage =	sock_no_sendpage,
526 };
527 
528 static const struct proto_ops unix_dgram_ops = {
529 	.family =	PF_UNIX,
530 	.owner =	THIS_MODULE,
531 	.release =	unix_release,
532 	.bind =		unix_bind,
533 	.connect =	unix_dgram_connect,
534 	.socketpair =	unix_socketpair,
535 	.accept =	sock_no_accept,
536 	.getname =	unix_getname,
537 	.poll =		unix_dgram_poll,
538 	.ioctl =	unix_ioctl,
539 	.listen =	sock_no_listen,
540 	.shutdown =	unix_shutdown,
541 	.setsockopt =	sock_no_setsockopt,
542 	.getsockopt =	sock_no_getsockopt,
543 	.sendmsg =	unix_dgram_sendmsg,
544 	.recvmsg =	unix_dgram_recvmsg,
545 	.mmap =		sock_no_mmap,
546 	.sendpage =	sock_no_sendpage,
547 };
548 
549 static const struct proto_ops unix_seqpacket_ops = {
550 	.family =	PF_UNIX,
551 	.owner =	THIS_MODULE,
552 	.release =	unix_release,
553 	.bind =		unix_bind,
554 	.connect =	unix_stream_connect,
555 	.socketpair =	unix_socketpair,
556 	.accept =	unix_accept,
557 	.getname =	unix_getname,
558 	.poll =		unix_dgram_poll,
559 	.ioctl =	unix_ioctl,
560 	.listen =	unix_listen,
561 	.shutdown =	unix_shutdown,
562 	.setsockopt =	sock_no_setsockopt,
563 	.getsockopt =	sock_no_getsockopt,
564 	.sendmsg =	unix_seqpacket_sendmsg,
565 	.recvmsg =	unix_dgram_recvmsg,
566 	.mmap =		sock_no_mmap,
567 	.sendpage =	sock_no_sendpage,
568 };
569 
570 static struct proto unix_proto = {
571 	.name			= "UNIX",
572 	.owner			= THIS_MODULE,
573 	.obj_size		= sizeof(struct unix_sock),
574 };
575 
576 /*
577  * AF_UNIX sockets do not interact with hardware, hence they
578  * dont trigger interrupts - so it's safe for them to have
579  * bh-unsafe locking for their sk_receive_queue.lock. Split off
580  * this special lock-class by reinitializing the spinlock key:
581  */
582 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583 
584 static struct sock *unix_create1(struct net *net, struct socket *sock)
585 {
586 	struct sock *sk = NULL;
587 	struct unix_sock *u;
588 
589 	atomic_inc(&unix_nr_socks);
590 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591 		goto out;
592 
593 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594 	if (!sk)
595 		goto out;
596 
597 	sock_init_data(sock, sk);
598 	lockdep_set_class(&sk->sk_receive_queue.lock,
599 				&af_unix_sk_receive_queue_lock_key);
600 
601 	sk->sk_write_space	= unix_write_space;
602 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603 	sk->sk_destruct		= unix_sock_destructor;
604 	u	  = unix_sk(sk);
605 	u->dentry = NULL;
606 	u->mnt	  = NULL;
607 	spin_lock_init(&u->lock);
608 	atomic_long_set(&u->inflight, 0);
609 	INIT_LIST_HEAD(&u->link);
610 	mutex_init(&u->readlock); /* single task reading lock */
611 	init_waitqueue_head(&u->peer_wait);
612 	unix_insert_socket(unix_sockets_unbound, sk);
613 out:
614 	if (sk == NULL)
615 		atomic_dec(&unix_nr_socks);
616 	else {
617 		local_bh_disable();
618 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619 		local_bh_enable();
620 	}
621 	return sk;
622 }
623 
624 static int unix_create(struct net *net, struct socket *sock, int protocol)
625 {
626 	if (protocol && protocol != PF_UNIX)
627 		return -EPROTONOSUPPORT;
628 
629 	sock->state = SS_UNCONNECTED;
630 
631 	switch (sock->type) {
632 	case SOCK_STREAM:
633 		sock->ops = &unix_stream_ops;
634 		break;
635 		/*
636 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
637 		 *	nothing uses it.
638 		 */
639 	case SOCK_RAW:
640 		sock->type = SOCK_DGRAM;
641 	case SOCK_DGRAM:
642 		sock->ops = &unix_dgram_ops;
643 		break;
644 	case SOCK_SEQPACKET:
645 		sock->ops = &unix_seqpacket_ops;
646 		break;
647 	default:
648 		return -ESOCKTNOSUPPORT;
649 	}
650 
651 	return unix_create1(net, sock) ? 0 : -ENOMEM;
652 }
653 
654 static int unix_release(struct socket *sock)
655 {
656 	struct sock *sk = sock->sk;
657 
658 	if (!sk)
659 		return 0;
660 
661 	sock->sk = NULL;
662 
663 	return unix_release_sock(sk, 0);
664 }
665 
666 static int unix_autobind(struct socket *sock)
667 {
668 	struct sock *sk = sock->sk;
669 	struct net *net = sock_net(sk);
670 	struct unix_sock *u = unix_sk(sk);
671 	static u32 ordernum = 1;
672 	struct unix_address *addr;
673 	int err;
674 
675 	mutex_lock(&u->readlock);
676 
677 	err = 0;
678 	if (u->addr)
679 		goto out;
680 
681 	err = -ENOMEM;
682 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683 	if (!addr)
684 		goto out;
685 
686 	addr->name->sun_family = AF_UNIX;
687 	atomic_set(&addr->refcnt, 1);
688 
689 retry:
690 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692 
693 	spin_lock(&unix_table_lock);
694 	ordernum = (ordernum+1)&0xFFFFF;
695 
696 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697 				      addr->hash)) {
698 		spin_unlock(&unix_table_lock);
699 		/* Sanity yield. It is unusual case, but yet... */
700 		if (!(ordernum&0xFF))
701 			yield();
702 		goto retry;
703 	}
704 	addr->hash ^= sk->sk_type;
705 
706 	__unix_remove_socket(sk);
707 	u->addr = addr;
708 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
709 	spin_unlock(&unix_table_lock);
710 	err = 0;
711 
712 out:	mutex_unlock(&u->readlock);
713 	return err;
714 }
715 
716 static struct sock *unix_find_other(struct net *net,
717 				    struct sockaddr_un *sunname, int len,
718 				    int type, unsigned hash, int *error)
719 {
720 	struct sock *u;
721 	struct path path;
722 	int err = 0;
723 
724 	if (sunname->sun_path[0]) {
725 		struct inode *inode;
726 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727 		if (err)
728 			goto fail;
729 		inode = path.dentry->d_inode;
730 		err = inode_permission(inode, MAY_WRITE);
731 		if (err)
732 			goto put_fail;
733 
734 		err = -ECONNREFUSED;
735 		if (!S_ISSOCK(inode->i_mode))
736 			goto put_fail;
737 		u = unix_find_socket_byinode(net, inode);
738 		if (!u)
739 			goto put_fail;
740 
741 		if (u->sk_type == type)
742 			touch_atime(path.mnt, path.dentry);
743 
744 		path_put(&path);
745 
746 		err = -EPROTOTYPE;
747 		if (u->sk_type != type) {
748 			sock_put(u);
749 			goto fail;
750 		}
751 	} else {
752 		err = -ECONNREFUSED;
753 		u = unix_find_socket_byname(net, sunname, len, type, hash);
754 		if (u) {
755 			struct dentry *dentry;
756 			dentry = unix_sk(u)->dentry;
757 			if (dentry)
758 				touch_atime(unix_sk(u)->mnt, dentry);
759 		} else
760 			goto fail;
761 	}
762 	return u;
763 
764 put_fail:
765 	path_put(&path);
766 fail:
767 	*error = err;
768 	return NULL;
769 }
770 
771 
772 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773 {
774 	struct sock *sk = sock->sk;
775 	struct net *net = sock_net(sk);
776 	struct unix_sock *u = unix_sk(sk);
777 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778 	struct dentry *dentry = NULL;
779 	struct nameidata nd;
780 	int err;
781 	unsigned hash;
782 	struct unix_address *addr;
783 	struct hlist_head *list;
784 
785 	err = -EINVAL;
786 	if (sunaddr->sun_family != AF_UNIX)
787 		goto out;
788 
789 	if (addr_len == sizeof(short)) {
790 		err = unix_autobind(sock);
791 		goto out;
792 	}
793 
794 	err = unix_mkname(sunaddr, addr_len, &hash);
795 	if (err < 0)
796 		goto out;
797 	addr_len = err;
798 
799 	mutex_lock(&u->readlock);
800 
801 	err = -EINVAL;
802 	if (u->addr)
803 		goto out_up;
804 
805 	err = -ENOMEM;
806 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807 	if (!addr)
808 		goto out_up;
809 
810 	memcpy(addr->name, sunaddr, addr_len);
811 	addr->len = addr_len;
812 	addr->hash = hash ^ sk->sk_type;
813 	atomic_set(&addr->refcnt, 1);
814 
815 	if (sunaddr->sun_path[0]) {
816 		unsigned int mode;
817 		err = 0;
818 		/*
819 		 * Get the parent directory, calculate the hash for last
820 		 * component.
821 		 */
822 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823 		if (err)
824 			goto out_mknod_parent;
825 
826 		dentry = lookup_create(&nd, 0);
827 		err = PTR_ERR(dentry);
828 		if (IS_ERR(dentry))
829 			goto out_mknod_unlock;
830 
831 		/*
832 		 * All right, let's create it.
833 		 */
834 		mode = S_IFSOCK |
835 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
836 		err = mnt_want_write(nd.path.mnt);
837 		if (err)
838 			goto out_mknod_dput;
839 		err = security_path_mknod(&nd.path, dentry, mode, 0);
840 		if (err)
841 			goto out_mknod_drop_write;
842 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
843 out_mknod_drop_write:
844 		mnt_drop_write(nd.path.mnt);
845 		if (err)
846 			goto out_mknod_dput;
847 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
848 		dput(nd.path.dentry);
849 		nd.path.dentry = dentry;
850 
851 		addr->hash = UNIX_HASH_SIZE;
852 	}
853 
854 	spin_lock(&unix_table_lock);
855 
856 	if (!sunaddr->sun_path[0]) {
857 		err = -EADDRINUSE;
858 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
859 					      sk->sk_type, hash)) {
860 			unix_release_addr(addr);
861 			goto out_unlock;
862 		}
863 
864 		list = &unix_socket_table[addr->hash];
865 	} else {
866 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
867 		u->dentry = nd.path.dentry;
868 		u->mnt    = nd.path.mnt;
869 	}
870 
871 	err = 0;
872 	__unix_remove_socket(sk);
873 	u->addr = addr;
874 	__unix_insert_socket(list, sk);
875 
876 out_unlock:
877 	spin_unlock(&unix_table_lock);
878 out_up:
879 	mutex_unlock(&u->readlock);
880 out:
881 	return err;
882 
883 out_mknod_dput:
884 	dput(dentry);
885 out_mknod_unlock:
886 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
887 	path_put(&nd.path);
888 out_mknod_parent:
889 	if (err == -EEXIST)
890 		err = -EADDRINUSE;
891 	unix_release_addr(addr);
892 	goto out_up;
893 }
894 
895 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
896 {
897 	if (unlikely(sk1 == sk2) || !sk2) {
898 		unix_state_lock(sk1);
899 		return;
900 	}
901 	if (sk1 < sk2) {
902 		unix_state_lock(sk1);
903 		unix_state_lock_nested(sk2);
904 	} else {
905 		unix_state_lock(sk2);
906 		unix_state_lock_nested(sk1);
907 	}
908 }
909 
910 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
911 {
912 	if (unlikely(sk1 == sk2) || !sk2) {
913 		unix_state_unlock(sk1);
914 		return;
915 	}
916 	unix_state_unlock(sk1);
917 	unix_state_unlock(sk2);
918 }
919 
920 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
921 			      int alen, int flags)
922 {
923 	struct sock *sk = sock->sk;
924 	struct net *net = sock_net(sk);
925 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
926 	struct sock *other;
927 	unsigned hash;
928 	int err;
929 
930 	if (addr->sa_family != AF_UNSPEC) {
931 		err = unix_mkname(sunaddr, alen, &hash);
932 		if (err < 0)
933 			goto out;
934 		alen = err;
935 
936 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
937 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
938 			goto out;
939 
940 restart:
941 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
942 		if (!other)
943 			goto out;
944 
945 		unix_state_double_lock(sk, other);
946 
947 		/* Apparently VFS overslept socket death. Retry. */
948 		if (sock_flag(other, SOCK_DEAD)) {
949 			unix_state_double_unlock(sk, other);
950 			sock_put(other);
951 			goto restart;
952 		}
953 
954 		err = -EPERM;
955 		if (!unix_may_send(sk, other))
956 			goto out_unlock;
957 
958 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
959 		if (err)
960 			goto out_unlock;
961 
962 	} else {
963 		/*
964 		 *	1003.1g breaking connected state with AF_UNSPEC
965 		 */
966 		other = NULL;
967 		unix_state_double_lock(sk, other);
968 	}
969 
970 	/*
971 	 * If it was connected, reconnect.
972 	 */
973 	if (unix_peer(sk)) {
974 		struct sock *old_peer = unix_peer(sk);
975 		unix_peer(sk) = other;
976 		unix_state_double_unlock(sk, other);
977 
978 		if (other != old_peer)
979 			unix_dgram_disconnected(sk, old_peer);
980 		sock_put(old_peer);
981 	} else {
982 		unix_peer(sk) = other;
983 		unix_state_double_unlock(sk, other);
984 	}
985 	return 0;
986 
987 out_unlock:
988 	unix_state_double_unlock(sk, other);
989 	sock_put(other);
990 out:
991 	return err;
992 }
993 
994 static long unix_wait_for_peer(struct sock *other, long timeo)
995 {
996 	struct unix_sock *u = unix_sk(other);
997 	int sched;
998 	DEFINE_WAIT(wait);
999 
1000 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001 
1002 	sched = !sock_flag(other, SOCK_DEAD) &&
1003 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1004 		unix_recvq_full(other);
1005 
1006 	unix_state_unlock(other);
1007 
1008 	if (sched)
1009 		timeo = schedule_timeout(timeo);
1010 
1011 	finish_wait(&u->peer_wait, &wait);
1012 	return timeo;
1013 }
1014 
1015 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016 			       int addr_len, int flags)
1017 {
1018 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019 	struct sock *sk = sock->sk;
1020 	struct net *net = sock_net(sk);
1021 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022 	struct sock *newsk = NULL;
1023 	struct sock *other = NULL;
1024 	struct sk_buff *skb = NULL;
1025 	unsigned hash;
1026 	int st;
1027 	int err;
1028 	long timeo;
1029 
1030 	err = unix_mkname(sunaddr, addr_len, &hash);
1031 	if (err < 0)
1032 		goto out;
1033 	addr_len = err;
1034 
1035 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1036 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1037 		goto out;
1038 
1039 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040 
1041 	/* First of all allocate resources.
1042 	   If we will make it after state is locked,
1043 	   we will have to recheck all again in any case.
1044 	 */
1045 
1046 	err = -ENOMEM;
1047 
1048 	/* create new sock for complete connection */
1049 	newsk = unix_create1(sock_net(sk), NULL);
1050 	if (newsk == NULL)
1051 		goto out;
1052 
1053 	/* Allocate skb for sending to listening sock */
1054 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055 	if (skb == NULL)
1056 		goto out;
1057 
1058 restart:
1059 	/*  Find listening sock. */
1060 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061 	if (!other)
1062 		goto out;
1063 
1064 	/* Latch state of peer */
1065 	unix_state_lock(other);
1066 
1067 	/* Apparently VFS overslept socket death. Retry. */
1068 	if (sock_flag(other, SOCK_DEAD)) {
1069 		unix_state_unlock(other);
1070 		sock_put(other);
1071 		goto restart;
1072 	}
1073 
1074 	err = -ECONNREFUSED;
1075 	if (other->sk_state != TCP_LISTEN)
1076 		goto out_unlock;
1077 
1078 	if (unix_recvq_full(other)) {
1079 		err = -EAGAIN;
1080 		if (!timeo)
1081 			goto out_unlock;
1082 
1083 		timeo = unix_wait_for_peer(other, timeo);
1084 
1085 		err = sock_intr_errno(timeo);
1086 		if (signal_pending(current))
1087 			goto out;
1088 		sock_put(other);
1089 		goto restart;
1090 	}
1091 
1092 	/* Latch our state.
1093 
1094 	   It is tricky place. We need to grab write lock and cannot
1095 	   drop lock on peer. It is dangerous because deadlock is
1096 	   possible. Connect to self case and simultaneous
1097 	   attempt to connect are eliminated by checking socket
1098 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1099 	   check this before attempt to grab lock.
1100 
1101 	   Well, and we have to recheck the state after socket locked.
1102 	 */
1103 	st = sk->sk_state;
1104 
1105 	switch (st) {
1106 	case TCP_CLOSE:
1107 		/* This is ok... continue with connect */
1108 		break;
1109 	case TCP_ESTABLISHED:
1110 		/* Socket is already connected */
1111 		err = -EISCONN;
1112 		goto out_unlock;
1113 	default:
1114 		err = -EINVAL;
1115 		goto out_unlock;
1116 	}
1117 
1118 	unix_state_lock_nested(sk);
1119 
1120 	if (sk->sk_state != st) {
1121 		unix_state_unlock(sk);
1122 		unix_state_unlock(other);
1123 		sock_put(other);
1124 		goto restart;
1125 	}
1126 
1127 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1128 	if (err) {
1129 		unix_state_unlock(sk);
1130 		goto out_unlock;
1131 	}
1132 
1133 	/* The way is open! Fastly set all the necessary fields... */
1134 
1135 	sock_hold(sk);
1136 	unix_peer(newsk)	= sk;
1137 	newsk->sk_state		= TCP_ESTABLISHED;
1138 	newsk->sk_type		= sk->sk_type;
1139 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1140 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1141 	newu = unix_sk(newsk);
1142 	newsk->sk_sleep		= &newu->peer_wait;
1143 	otheru = unix_sk(other);
1144 
1145 	/* copy address information from listening to new sock*/
1146 	if (otheru->addr) {
1147 		atomic_inc(&otheru->addr->refcnt);
1148 		newu->addr = otheru->addr;
1149 	}
1150 	if (otheru->dentry) {
1151 		newu->dentry	= dget(otheru->dentry);
1152 		newu->mnt	= mntget(otheru->mnt);
1153 	}
1154 
1155 	/* Set credentials */
1156 	sk->sk_peercred = other->sk_peercred;
1157 
1158 	sock->state	= SS_CONNECTED;
1159 	sk->sk_state	= TCP_ESTABLISHED;
1160 	sock_hold(newsk);
1161 
1162 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1163 	unix_peer(sk)	= newsk;
1164 
1165 	unix_state_unlock(sk);
1166 
1167 	/* take ten and and send info to listening sock */
1168 	spin_lock(&other->sk_receive_queue.lock);
1169 	__skb_queue_tail(&other->sk_receive_queue, skb);
1170 	spin_unlock(&other->sk_receive_queue.lock);
1171 	unix_state_unlock(other);
1172 	other->sk_data_ready(other, 0);
1173 	sock_put(other);
1174 	return 0;
1175 
1176 out_unlock:
1177 	if (other)
1178 		unix_state_unlock(other);
1179 
1180 out:
1181 	kfree_skb(skb);
1182 	if (newsk)
1183 		unix_release_sock(newsk, 0);
1184 	if (other)
1185 		sock_put(other);
1186 	return err;
1187 }
1188 
1189 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1190 {
1191 	struct sock *ska = socka->sk, *skb = sockb->sk;
1192 
1193 	/* Join our sockets back to back */
1194 	sock_hold(ska);
1195 	sock_hold(skb);
1196 	unix_peer(ska) = skb;
1197 	unix_peer(skb) = ska;
1198 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1199 	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1200 	ska->sk_peercred.uid = skb->sk_peercred.uid;
1201 	ska->sk_peercred.gid = skb->sk_peercred.gid;
1202 
1203 	if (ska->sk_type != SOCK_DGRAM) {
1204 		ska->sk_state = TCP_ESTABLISHED;
1205 		skb->sk_state = TCP_ESTABLISHED;
1206 		socka->state  = SS_CONNECTED;
1207 		sockb->state  = SS_CONNECTED;
1208 	}
1209 	return 0;
1210 }
1211 
1212 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1213 {
1214 	struct sock *sk = sock->sk;
1215 	struct sock *tsk;
1216 	struct sk_buff *skb;
1217 	int err;
1218 
1219 	err = -EOPNOTSUPP;
1220 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1221 		goto out;
1222 
1223 	err = -EINVAL;
1224 	if (sk->sk_state != TCP_LISTEN)
1225 		goto out;
1226 
1227 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1228 	 * so that no locks are necessary.
1229 	 */
1230 
1231 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1232 	if (!skb) {
1233 		/* This means receive shutdown. */
1234 		if (err == 0)
1235 			err = -EINVAL;
1236 		goto out;
1237 	}
1238 
1239 	tsk = skb->sk;
1240 	skb_free_datagram(sk, skb);
1241 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1242 
1243 	/* attach accepted sock to socket */
1244 	unix_state_lock(tsk);
1245 	newsock->state = SS_CONNECTED;
1246 	sock_graft(tsk, newsock);
1247 	unix_state_unlock(tsk);
1248 	return 0;
1249 
1250 out:
1251 	return err;
1252 }
1253 
1254 
1255 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1256 {
1257 	struct sock *sk = sock->sk;
1258 	struct unix_sock *u;
1259 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1260 	int err = 0;
1261 
1262 	if (peer) {
1263 		sk = unix_peer_get(sk);
1264 
1265 		err = -ENOTCONN;
1266 		if (!sk)
1267 			goto out;
1268 		err = 0;
1269 	} else {
1270 		sock_hold(sk);
1271 	}
1272 
1273 	u = unix_sk(sk);
1274 	unix_state_lock(sk);
1275 	if (!u->addr) {
1276 		sunaddr->sun_family = AF_UNIX;
1277 		sunaddr->sun_path[0] = 0;
1278 		*uaddr_len = sizeof(short);
1279 	} else {
1280 		struct unix_address *addr = u->addr;
1281 
1282 		*uaddr_len = addr->len;
1283 		memcpy(sunaddr, addr->name, *uaddr_len);
1284 	}
1285 	unix_state_unlock(sk);
1286 	sock_put(sk);
1287 out:
1288 	return err;
1289 }
1290 
1291 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1292 {
1293 	int i;
1294 
1295 	scm->fp = UNIXCB(skb).fp;
1296 	skb->destructor = sock_wfree;
1297 	UNIXCB(skb).fp = NULL;
1298 
1299 	for (i = scm->fp->count-1; i >= 0; i--)
1300 		unix_notinflight(scm->fp->fp[i]);
1301 }
1302 
1303 static void unix_destruct_fds(struct sk_buff *skb)
1304 {
1305 	struct scm_cookie scm;
1306 	memset(&scm, 0, sizeof(scm));
1307 	unix_detach_fds(&scm, skb);
1308 
1309 	/* Alas, it calls VFS */
1310 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1311 	scm_destroy(&scm);
1312 	sock_wfree(skb);
1313 }
1314 
1315 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1316 {
1317 	int i;
1318 
1319 	/*
1320 	 * Need to duplicate file references for the sake of garbage
1321 	 * collection.  Otherwise a socket in the fps might become a
1322 	 * candidate for GC while the skb is not yet queued.
1323 	 */
1324 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1325 	if (!UNIXCB(skb).fp)
1326 		return -ENOMEM;
1327 
1328 	for (i = scm->fp->count-1; i >= 0; i--)
1329 		unix_inflight(scm->fp->fp[i]);
1330 	skb->destructor = unix_destruct_fds;
1331 	return 0;
1332 }
1333 
1334 /*
1335  *	Send AF_UNIX data.
1336  */
1337 
1338 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1339 			      struct msghdr *msg, size_t len)
1340 {
1341 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1342 	struct sock *sk = sock->sk;
1343 	struct net *net = sock_net(sk);
1344 	struct unix_sock *u = unix_sk(sk);
1345 	struct sockaddr_un *sunaddr = msg->msg_name;
1346 	struct sock *other = NULL;
1347 	int namelen = 0; /* fake GCC */
1348 	int err;
1349 	unsigned hash;
1350 	struct sk_buff *skb;
1351 	long timeo;
1352 	struct scm_cookie tmp_scm;
1353 
1354 	if (NULL == siocb->scm)
1355 		siocb->scm = &tmp_scm;
1356 	wait_for_unix_gc();
1357 	err = scm_send(sock, msg, siocb->scm);
1358 	if (err < 0)
1359 		return err;
1360 
1361 	err = -EOPNOTSUPP;
1362 	if (msg->msg_flags&MSG_OOB)
1363 		goto out;
1364 
1365 	if (msg->msg_namelen) {
1366 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1367 		if (err < 0)
1368 			goto out;
1369 		namelen = err;
1370 	} else {
1371 		sunaddr = NULL;
1372 		err = -ENOTCONN;
1373 		other = unix_peer_get(sk);
1374 		if (!other)
1375 			goto out;
1376 	}
1377 
1378 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1379 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1380 		goto out;
1381 
1382 	err = -EMSGSIZE;
1383 	if (len > sk->sk_sndbuf - 32)
1384 		goto out;
1385 
1386 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1387 	if (skb == NULL)
1388 		goto out;
1389 
1390 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1391 	if (siocb->scm->fp) {
1392 		err = unix_attach_fds(siocb->scm, skb);
1393 		if (err)
1394 			goto out_free;
1395 	}
1396 	unix_get_secdata(siocb->scm, skb);
1397 
1398 	skb_reset_transport_header(skb);
1399 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1400 	if (err)
1401 		goto out_free;
1402 
1403 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1404 
1405 restart:
1406 	if (!other) {
1407 		err = -ECONNRESET;
1408 		if (sunaddr == NULL)
1409 			goto out_free;
1410 
1411 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1412 					hash, &err);
1413 		if (other == NULL)
1414 			goto out_free;
1415 	}
1416 
1417 	unix_state_lock(other);
1418 	err = -EPERM;
1419 	if (!unix_may_send(sk, other))
1420 		goto out_unlock;
1421 
1422 	if (sock_flag(other, SOCK_DEAD)) {
1423 		/*
1424 		 *	Check with 1003.1g - what should
1425 		 *	datagram error
1426 		 */
1427 		unix_state_unlock(other);
1428 		sock_put(other);
1429 
1430 		err = 0;
1431 		unix_state_lock(sk);
1432 		if (unix_peer(sk) == other) {
1433 			unix_peer(sk) = NULL;
1434 			unix_state_unlock(sk);
1435 
1436 			unix_dgram_disconnected(sk, other);
1437 			sock_put(other);
1438 			err = -ECONNREFUSED;
1439 		} else {
1440 			unix_state_unlock(sk);
1441 		}
1442 
1443 		other = NULL;
1444 		if (err)
1445 			goto out_free;
1446 		goto restart;
1447 	}
1448 
1449 	err = -EPIPE;
1450 	if (other->sk_shutdown & RCV_SHUTDOWN)
1451 		goto out_unlock;
1452 
1453 	if (sk->sk_type != SOCK_SEQPACKET) {
1454 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1455 		if (err)
1456 			goto out_unlock;
1457 	}
1458 
1459 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1460 		if (!timeo) {
1461 			err = -EAGAIN;
1462 			goto out_unlock;
1463 		}
1464 
1465 		timeo = unix_wait_for_peer(other, timeo);
1466 
1467 		err = sock_intr_errno(timeo);
1468 		if (signal_pending(current))
1469 			goto out_free;
1470 
1471 		goto restart;
1472 	}
1473 
1474 	skb_queue_tail(&other->sk_receive_queue, skb);
1475 	unix_state_unlock(other);
1476 	other->sk_data_ready(other, len);
1477 	sock_put(other);
1478 	scm_destroy(siocb->scm);
1479 	return len;
1480 
1481 out_unlock:
1482 	unix_state_unlock(other);
1483 out_free:
1484 	kfree_skb(skb);
1485 out:
1486 	if (other)
1487 		sock_put(other);
1488 	scm_destroy(siocb->scm);
1489 	return err;
1490 }
1491 
1492 
1493 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1494 			       struct msghdr *msg, size_t len)
1495 {
1496 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1497 	struct sock *sk = sock->sk;
1498 	struct sock *other = NULL;
1499 	struct sockaddr_un *sunaddr = msg->msg_name;
1500 	int err, size;
1501 	struct sk_buff *skb;
1502 	int sent = 0;
1503 	struct scm_cookie tmp_scm;
1504 
1505 	if (NULL == siocb->scm)
1506 		siocb->scm = &tmp_scm;
1507 	wait_for_unix_gc();
1508 	err = scm_send(sock, msg, siocb->scm);
1509 	if (err < 0)
1510 		return err;
1511 
1512 	err = -EOPNOTSUPP;
1513 	if (msg->msg_flags&MSG_OOB)
1514 		goto out_err;
1515 
1516 	if (msg->msg_namelen) {
1517 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1518 		goto out_err;
1519 	} else {
1520 		sunaddr = NULL;
1521 		err = -ENOTCONN;
1522 		other = unix_peer(sk);
1523 		if (!other)
1524 			goto out_err;
1525 	}
1526 
1527 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1528 		goto pipe_err;
1529 
1530 	while (sent < len) {
1531 		/*
1532 		 *	Optimisation for the fact that under 0.01% of X
1533 		 *	messages typically need breaking up.
1534 		 */
1535 
1536 		size = len-sent;
1537 
1538 		/* Keep two messages in the pipe so it schedules better */
1539 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1540 			size = (sk->sk_sndbuf >> 1) - 64;
1541 
1542 		if (size > SKB_MAX_ALLOC)
1543 			size = SKB_MAX_ALLOC;
1544 
1545 		/*
1546 		 *	Grab a buffer
1547 		 */
1548 
1549 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1550 					  &err);
1551 
1552 		if (skb == NULL)
1553 			goto out_err;
1554 
1555 		/*
1556 		 *	If you pass two values to the sock_alloc_send_skb
1557 		 *	it tries to grab the large buffer with GFP_NOFS
1558 		 *	(which can fail easily), and if it fails grab the
1559 		 *	fallback size buffer which is under a page and will
1560 		 *	succeed. [Alan]
1561 		 */
1562 		size = min_t(int, size, skb_tailroom(skb));
1563 
1564 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1565 		if (siocb->scm->fp) {
1566 			err = unix_attach_fds(siocb->scm, skb);
1567 			if (err) {
1568 				kfree_skb(skb);
1569 				goto out_err;
1570 			}
1571 		}
1572 
1573 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1574 		if (err) {
1575 			kfree_skb(skb);
1576 			goto out_err;
1577 		}
1578 
1579 		unix_state_lock(other);
1580 
1581 		if (sock_flag(other, SOCK_DEAD) ||
1582 		    (other->sk_shutdown & RCV_SHUTDOWN))
1583 			goto pipe_err_free;
1584 
1585 		skb_queue_tail(&other->sk_receive_queue, skb);
1586 		unix_state_unlock(other);
1587 		other->sk_data_ready(other, size);
1588 		sent += size;
1589 	}
1590 
1591 	scm_destroy(siocb->scm);
1592 	siocb->scm = NULL;
1593 
1594 	return sent;
1595 
1596 pipe_err_free:
1597 	unix_state_unlock(other);
1598 	kfree_skb(skb);
1599 pipe_err:
1600 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1601 		send_sig(SIGPIPE, current, 0);
1602 	err = -EPIPE;
1603 out_err:
1604 	scm_destroy(siocb->scm);
1605 	siocb->scm = NULL;
1606 	return sent ? : err;
1607 }
1608 
1609 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1610 				  struct msghdr *msg, size_t len)
1611 {
1612 	int err;
1613 	struct sock *sk = sock->sk;
1614 
1615 	err = sock_error(sk);
1616 	if (err)
1617 		return err;
1618 
1619 	if (sk->sk_state != TCP_ESTABLISHED)
1620 		return -ENOTCONN;
1621 
1622 	if (msg->msg_namelen)
1623 		msg->msg_namelen = 0;
1624 
1625 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1626 }
1627 
1628 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1629 {
1630 	struct unix_sock *u = unix_sk(sk);
1631 
1632 	msg->msg_namelen = 0;
1633 	if (u->addr) {
1634 		msg->msg_namelen = u->addr->len;
1635 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1636 	}
1637 }
1638 
1639 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1640 			      struct msghdr *msg, size_t size,
1641 			      int flags)
1642 {
1643 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1644 	struct scm_cookie tmp_scm;
1645 	struct sock *sk = sock->sk;
1646 	struct unix_sock *u = unix_sk(sk);
1647 	int noblock = flags & MSG_DONTWAIT;
1648 	struct sk_buff *skb;
1649 	int err;
1650 
1651 	err = -EOPNOTSUPP;
1652 	if (flags&MSG_OOB)
1653 		goto out;
1654 
1655 	msg->msg_namelen = 0;
1656 
1657 	mutex_lock(&u->readlock);
1658 
1659 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1660 	if (!skb) {
1661 		unix_state_lock(sk);
1662 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1663 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1664 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1665 			err = 0;
1666 		unix_state_unlock(sk);
1667 		goto out_unlock;
1668 	}
1669 
1670 	wake_up_interruptible_sync(&u->peer_wait);
1671 
1672 	if (msg->msg_name)
1673 		unix_copy_addr(msg, skb->sk);
1674 
1675 	if (size > skb->len)
1676 		size = skb->len;
1677 	else if (size < skb->len)
1678 		msg->msg_flags |= MSG_TRUNC;
1679 
1680 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1681 	if (err)
1682 		goto out_free;
1683 
1684 	if (!siocb->scm) {
1685 		siocb->scm = &tmp_scm;
1686 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1687 	}
1688 	siocb->scm->creds = *UNIXCREDS(skb);
1689 	unix_set_secdata(siocb->scm, skb);
1690 
1691 	if (!(flags & MSG_PEEK)) {
1692 		if (UNIXCB(skb).fp)
1693 			unix_detach_fds(siocb->scm, skb);
1694 	} else {
1695 		/* It is questionable: on PEEK we could:
1696 		   - do not return fds - good, but too simple 8)
1697 		   - return fds, and do not return them on read (old strategy,
1698 		     apparently wrong)
1699 		   - clone fds (I chose it for now, it is the most universal
1700 		     solution)
1701 
1702 		   POSIX 1003.1g does not actually define this clearly
1703 		   at all. POSIX 1003.1g doesn't define a lot of things
1704 		   clearly however!
1705 
1706 		*/
1707 		if (UNIXCB(skb).fp)
1708 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1709 	}
1710 	err = size;
1711 
1712 	scm_recv(sock, msg, siocb->scm, flags);
1713 
1714 out_free:
1715 	skb_free_datagram(sk, skb);
1716 out_unlock:
1717 	mutex_unlock(&u->readlock);
1718 out:
1719 	return err;
1720 }
1721 
1722 /*
1723  *	Sleep until data has arrive. But check for races..
1724  */
1725 
1726 static long unix_stream_data_wait(struct sock *sk, long timeo)
1727 {
1728 	DEFINE_WAIT(wait);
1729 
1730 	unix_state_lock(sk);
1731 
1732 	for (;;) {
1733 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1734 
1735 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1736 		    sk->sk_err ||
1737 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1738 		    signal_pending(current) ||
1739 		    !timeo)
1740 			break;
1741 
1742 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1743 		unix_state_unlock(sk);
1744 		timeo = schedule_timeout(timeo);
1745 		unix_state_lock(sk);
1746 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1747 	}
1748 
1749 	finish_wait(sk->sk_sleep, &wait);
1750 	unix_state_unlock(sk);
1751 	return timeo;
1752 }
1753 
1754 
1755 
1756 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1757 			       struct msghdr *msg, size_t size,
1758 			       int flags)
1759 {
1760 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1761 	struct scm_cookie tmp_scm;
1762 	struct sock *sk = sock->sk;
1763 	struct unix_sock *u = unix_sk(sk);
1764 	struct sockaddr_un *sunaddr = msg->msg_name;
1765 	int copied = 0;
1766 	int check_creds = 0;
1767 	int target;
1768 	int err = 0;
1769 	long timeo;
1770 
1771 	err = -EINVAL;
1772 	if (sk->sk_state != TCP_ESTABLISHED)
1773 		goto out;
1774 
1775 	err = -EOPNOTSUPP;
1776 	if (flags&MSG_OOB)
1777 		goto out;
1778 
1779 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1780 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1781 
1782 	msg->msg_namelen = 0;
1783 
1784 	/* Lock the socket to prevent queue disordering
1785 	 * while sleeps in memcpy_tomsg
1786 	 */
1787 
1788 	if (!siocb->scm) {
1789 		siocb->scm = &tmp_scm;
1790 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1791 	}
1792 
1793 	mutex_lock(&u->readlock);
1794 
1795 	do {
1796 		int chunk;
1797 		struct sk_buff *skb;
1798 
1799 		unix_state_lock(sk);
1800 		skb = skb_dequeue(&sk->sk_receive_queue);
1801 		if (skb == NULL) {
1802 			if (copied >= target)
1803 				goto unlock;
1804 
1805 			/*
1806 			 *	POSIX 1003.1g mandates this order.
1807 			 */
1808 
1809 			err = sock_error(sk);
1810 			if (err)
1811 				goto unlock;
1812 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1813 				goto unlock;
1814 
1815 			unix_state_unlock(sk);
1816 			err = -EAGAIN;
1817 			if (!timeo)
1818 				break;
1819 			mutex_unlock(&u->readlock);
1820 
1821 			timeo = unix_stream_data_wait(sk, timeo);
1822 
1823 			if (signal_pending(current)) {
1824 				err = sock_intr_errno(timeo);
1825 				goto out;
1826 			}
1827 			mutex_lock(&u->readlock);
1828 			continue;
1829  unlock:
1830 			unix_state_unlock(sk);
1831 			break;
1832 		}
1833 		unix_state_unlock(sk);
1834 
1835 		if (check_creds) {
1836 			/* Never glue messages from different writers */
1837 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1838 				   sizeof(siocb->scm->creds)) != 0) {
1839 				skb_queue_head(&sk->sk_receive_queue, skb);
1840 				break;
1841 			}
1842 		} else {
1843 			/* Copy credentials */
1844 			siocb->scm->creds = *UNIXCREDS(skb);
1845 			check_creds = 1;
1846 		}
1847 
1848 		/* Copy address just once */
1849 		if (sunaddr) {
1850 			unix_copy_addr(msg, skb->sk);
1851 			sunaddr = NULL;
1852 		}
1853 
1854 		chunk = min_t(unsigned int, skb->len, size);
1855 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1856 			skb_queue_head(&sk->sk_receive_queue, skb);
1857 			if (copied == 0)
1858 				copied = -EFAULT;
1859 			break;
1860 		}
1861 		copied += chunk;
1862 		size -= chunk;
1863 
1864 		/* Mark read part of skb as used */
1865 		if (!(flags & MSG_PEEK)) {
1866 			skb_pull(skb, chunk);
1867 
1868 			if (UNIXCB(skb).fp)
1869 				unix_detach_fds(siocb->scm, skb);
1870 
1871 			/* put the skb back if we didn't use it up.. */
1872 			if (skb->len) {
1873 				skb_queue_head(&sk->sk_receive_queue, skb);
1874 				break;
1875 			}
1876 
1877 			kfree_skb(skb);
1878 
1879 			if (siocb->scm->fp)
1880 				break;
1881 		} else {
1882 			/* It is questionable, see note in unix_dgram_recvmsg.
1883 			 */
1884 			if (UNIXCB(skb).fp)
1885 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1886 
1887 			/* put message back and return */
1888 			skb_queue_head(&sk->sk_receive_queue, skb);
1889 			break;
1890 		}
1891 	} while (size);
1892 
1893 	mutex_unlock(&u->readlock);
1894 	scm_recv(sock, msg, siocb->scm, flags);
1895 out:
1896 	return copied ? : err;
1897 }
1898 
1899 static int unix_shutdown(struct socket *sock, int mode)
1900 {
1901 	struct sock *sk = sock->sk;
1902 	struct sock *other;
1903 
1904 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1905 
1906 	if (mode) {
1907 		unix_state_lock(sk);
1908 		sk->sk_shutdown |= mode;
1909 		other = unix_peer(sk);
1910 		if (other)
1911 			sock_hold(other);
1912 		unix_state_unlock(sk);
1913 		sk->sk_state_change(sk);
1914 
1915 		if (other &&
1916 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1917 
1918 			int peer_mode = 0;
1919 
1920 			if (mode&RCV_SHUTDOWN)
1921 				peer_mode |= SEND_SHUTDOWN;
1922 			if (mode&SEND_SHUTDOWN)
1923 				peer_mode |= RCV_SHUTDOWN;
1924 			unix_state_lock(other);
1925 			other->sk_shutdown |= peer_mode;
1926 			unix_state_unlock(other);
1927 			other->sk_state_change(other);
1928 			read_lock(&other->sk_callback_lock);
1929 			if (peer_mode == SHUTDOWN_MASK)
1930 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1931 			else if (peer_mode & RCV_SHUTDOWN)
1932 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1933 			read_unlock(&other->sk_callback_lock);
1934 		}
1935 		if (other)
1936 			sock_put(other);
1937 	}
1938 	return 0;
1939 }
1940 
1941 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1942 {
1943 	struct sock *sk = sock->sk;
1944 	long amount = 0;
1945 	int err;
1946 
1947 	switch (cmd) {
1948 	case SIOCOUTQ:
1949 		amount = atomic_read(&sk->sk_wmem_alloc);
1950 		err = put_user(amount, (int __user *)arg);
1951 		break;
1952 	case SIOCINQ:
1953 		{
1954 			struct sk_buff *skb;
1955 
1956 			if (sk->sk_state == TCP_LISTEN) {
1957 				err = -EINVAL;
1958 				break;
1959 			}
1960 
1961 			spin_lock(&sk->sk_receive_queue.lock);
1962 			if (sk->sk_type == SOCK_STREAM ||
1963 			    sk->sk_type == SOCK_SEQPACKET) {
1964 				skb_queue_walk(&sk->sk_receive_queue, skb)
1965 					amount += skb->len;
1966 			} else {
1967 				skb = skb_peek(&sk->sk_receive_queue);
1968 				if (skb)
1969 					amount = skb->len;
1970 			}
1971 			spin_unlock(&sk->sk_receive_queue.lock);
1972 			err = put_user(amount, (int __user *)arg);
1973 			break;
1974 		}
1975 
1976 	default:
1977 		err = -ENOIOCTLCMD;
1978 		break;
1979 	}
1980 	return err;
1981 }
1982 
1983 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1984 {
1985 	struct sock *sk = sock->sk;
1986 	unsigned int mask;
1987 
1988 	poll_wait(file, sk->sk_sleep, wait);
1989 	mask = 0;
1990 
1991 	/* exceptional events? */
1992 	if (sk->sk_err)
1993 		mask |= POLLERR;
1994 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1995 		mask |= POLLHUP;
1996 	if (sk->sk_shutdown & RCV_SHUTDOWN)
1997 		mask |= POLLRDHUP;
1998 
1999 	/* readable? */
2000 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2001 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2002 		mask |= POLLIN | POLLRDNORM;
2003 
2004 	/* Connection-based need to check for termination and startup */
2005 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2006 	    sk->sk_state == TCP_CLOSE)
2007 		mask |= POLLHUP;
2008 
2009 	/*
2010 	 * we set writable also when the other side has shut down the
2011 	 * connection. This prevents stuck sockets.
2012 	 */
2013 	if (unix_writable(sk))
2014 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2015 
2016 	return mask;
2017 }
2018 
2019 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2020 				    poll_table *wait)
2021 {
2022 	struct sock *sk = sock->sk, *other;
2023 	unsigned int mask, writable;
2024 
2025 	poll_wait(file, sk->sk_sleep, wait);
2026 	mask = 0;
2027 
2028 	/* exceptional events? */
2029 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2030 		mask |= POLLERR;
2031 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2032 		mask |= POLLRDHUP;
2033 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2034 		mask |= POLLHUP;
2035 
2036 	/* readable? */
2037 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2038 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2039 		mask |= POLLIN | POLLRDNORM;
2040 
2041 	/* Connection-based need to check for termination and startup */
2042 	if (sk->sk_type == SOCK_SEQPACKET) {
2043 		if (sk->sk_state == TCP_CLOSE)
2044 			mask |= POLLHUP;
2045 		/* connection hasn't started yet? */
2046 		if (sk->sk_state == TCP_SYN_SENT)
2047 			return mask;
2048 	}
2049 
2050 	/* writable? */
2051 	writable = unix_writable(sk);
2052 	if (writable) {
2053 		other = unix_peer_get(sk);
2054 		if (other) {
2055 			if (unix_peer(other) != sk) {
2056 				poll_wait(file, &unix_sk(other)->peer_wait,
2057 					  wait);
2058 				if (unix_recvq_full(other))
2059 					writable = 0;
2060 			}
2061 
2062 			sock_put(other);
2063 		}
2064 	}
2065 
2066 	if (writable)
2067 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2068 	else
2069 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2070 
2071 	return mask;
2072 }
2073 
2074 #ifdef CONFIG_PROC_FS
2075 static struct sock *first_unix_socket(int *i)
2076 {
2077 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2078 		if (!hlist_empty(&unix_socket_table[*i]))
2079 			return __sk_head(&unix_socket_table[*i]);
2080 	}
2081 	return NULL;
2082 }
2083 
2084 static struct sock *next_unix_socket(int *i, struct sock *s)
2085 {
2086 	struct sock *next = sk_next(s);
2087 	/* More in this chain? */
2088 	if (next)
2089 		return next;
2090 	/* Look for next non-empty chain. */
2091 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2092 		if (!hlist_empty(&unix_socket_table[*i]))
2093 			return __sk_head(&unix_socket_table[*i]);
2094 	}
2095 	return NULL;
2096 }
2097 
2098 struct unix_iter_state {
2099 	struct seq_net_private p;
2100 	int i;
2101 };
2102 
2103 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2104 {
2105 	struct unix_iter_state *iter = seq->private;
2106 	loff_t off = 0;
2107 	struct sock *s;
2108 
2109 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2110 		if (sock_net(s) != seq_file_net(seq))
2111 			continue;
2112 		if (off == pos)
2113 			return s;
2114 		++off;
2115 	}
2116 	return NULL;
2117 }
2118 
2119 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2120 	__acquires(unix_table_lock)
2121 {
2122 	spin_lock(&unix_table_lock);
2123 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2124 }
2125 
2126 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2127 {
2128 	struct unix_iter_state *iter = seq->private;
2129 	struct sock *sk = v;
2130 	++*pos;
2131 
2132 	if (v == SEQ_START_TOKEN)
2133 		sk = first_unix_socket(&iter->i);
2134 	else
2135 		sk = next_unix_socket(&iter->i, sk);
2136 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2137 		sk = next_unix_socket(&iter->i, sk);
2138 	return sk;
2139 }
2140 
2141 static void unix_seq_stop(struct seq_file *seq, void *v)
2142 	__releases(unix_table_lock)
2143 {
2144 	spin_unlock(&unix_table_lock);
2145 }
2146 
2147 static int unix_seq_show(struct seq_file *seq, void *v)
2148 {
2149 
2150 	if (v == SEQ_START_TOKEN)
2151 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2152 			 "Inode Path\n");
2153 	else {
2154 		struct sock *s = v;
2155 		struct unix_sock *u = unix_sk(s);
2156 		unix_state_lock(s);
2157 
2158 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2159 			s,
2160 			atomic_read(&s->sk_refcnt),
2161 			0,
2162 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2163 			s->sk_type,
2164 			s->sk_socket ?
2165 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2166 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2167 			sock_i_ino(s));
2168 
2169 		if (u->addr) {
2170 			int i, len;
2171 			seq_putc(seq, ' ');
2172 
2173 			i = 0;
2174 			len = u->addr->len - sizeof(short);
2175 			if (!UNIX_ABSTRACT(s))
2176 				len--;
2177 			else {
2178 				seq_putc(seq, '@');
2179 				i++;
2180 			}
2181 			for ( ; i < len; i++)
2182 				seq_putc(seq, u->addr->name->sun_path[i]);
2183 		}
2184 		unix_state_unlock(s);
2185 		seq_putc(seq, '\n');
2186 	}
2187 
2188 	return 0;
2189 }
2190 
2191 static const struct seq_operations unix_seq_ops = {
2192 	.start  = unix_seq_start,
2193 	.next   = unix_seq_next,
2194 	.stop   = unix_seq_stop,
2195 	.show   = unix_seq_show,
2196 };
2197 
2198 static int unix_seq_open(struct inode *inode, struct file *file)
2199 {
2200 	return seq_open_net(inode, file, &unix_seq_ops,
2201 			    sizeof(struct unix_iter_state));
2202 }
2203 
2204 static const struct file_operations unix_seq_fops = {
2205 	.owner		= THIS_MODULE,
2206 	.open		= unix_seq_open,
2207 	.read		= seq_read,
2208 	.llseek		= seq_lseek,
2209 	.release	= seq_release_net,
2210 };
2211 
2212 #endif
2213 
2214 static struct net_proto_family unix_family_ops = {
2215 	.family = PF_UNIX,
2216 	.create = unix_create,
2217 	.owner	= THIS_MODULE,
2218 };
2219 
2220 
2221 static int unix_net_init(struct net *net)
2222 {
2223 	int error = -ENOMEM;
2224 
2225 	net->unx.sysctl_max_dgram_qlen = 10;
2226 	if (unix_sysctl_register(net))
2227 		goto out;
2228 
2229 #ifdef CONFIG_PROC_FS
2230 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2231 		unix_sysctl_unregister(net);
2232 		goto out;
2233 	}
2234 #endif
2235 	error = 0;
2236 out:
2237 	return error;
2238 }
2239 
2240 static void unix_net_exit(struct net *net)
2241 {
2242 	unix_sysctl_unregister(net);
2243 	proc_net_remove(net, "unix");
2244 }
2245 
2246 static struct pernet_operations unix_net_ops = {
2247 	.init = unix_net_init,
2248 	.exit = unix_net_exit,
2249 };
2250 
2251 static int __init af_unix_init(void)
2252 {
2253 	int rc = -1;
2254 	struct sk_buff *dummy_skb;
2255 
2256 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2257 
2258 	rc = proto_register(&unix_proto, 1);
2259 	if (rc != 0) {
2260 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2261 		       __func__);
2262 		goto out;
2263 	}
2264 
2265 	sock_register(&unix_family_ops);
2266 	register_pernet_subsys(&unix_net_ops);
2267 out:
2268 	return rc;
2269 }
2270 
2271 static void __exit af_unix_exit(void)
2272 {
2273 	sock_unregister(PF_UNIX);
2274 	proto_unregister(&unix_proto);
2275 	unregister_pernet_subsys(&unix_net_ops);
2276 }
2277 
2278 /* Earlier than device_initcall() so that other drivers invoking
2279    request_module() don't end up in a loop when modprobe tries
2280    to use a UNIX socket. But later than subsys_initcall() because
2281    we depend on stuff initialised there */
2282 fs_initcall(af_unix_init);
2283 module_exit(af_unix_exit);
2284 
2285 MODULE_LICENSE("GPL");
2286 MODULE_ALIAS_NETPROTO(PF_UNIX);
2287