xref: /openbmc/linux/net/unix/af_unix.c (revision e190bfe5)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate spin lock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
314 static void unix_write_space(struct sock *sk)
315 {
316 	struct socket_wq *wq;
317 
318 	rcu_read_lock();
319 	if (unix_writable(sk)) {
320 		wq = rcu_dereference(sk->sk_wq);
321 		if (wq_has_sleeper(wq))
322 			wake_up_interruptible_sync(&wq->wait);
323 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
324 	}
325 	rcu_read_unlock();
326 }
327 
328 /* When dgram socket disconnects (or changes its peer), we clear its receive
329  * queue of packets arrived from previous peer. First, it allows to do
330  * flow control based only on wmem_alloc; second, sk connected to peer
331  * may receive messages only from that peer. */
332 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
333 {
334 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
335 		skb_queue_purge(&sk->sk_receive_queue);
336 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
337 
338 		/* If one link of bidirectional dgram pipe is disconnected,
339 		 * we signal error. Messages are lost. Do not make this,
340 		 * when peer was not connected to us.
341 		 */
342 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
343 			other->sk_err = ECONNRESET;
344 			other->sk_error_report(other);
345 		}
346 	}
347 }
348 
349 static void unix_sock_destructor(struct sock *sk)
350 {
351 	struct unix_sock *u = unix_sk(sk);
352 
353 	skb_queue_purge(&sk->sk_receive_queue);
354 
355 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
356 	WARN_ON(!sk_unhashed(sk));
357 	WARN_ON(sk->sk_socket);
358 	if (!sock_flag(sk, SOCK_DEAD)) {
359 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
360 		return;
361 	}
362 
363 	if (u->addr)
364 		unix_release_addr(u->addr);
365 
366 	atomic_dec(&unix_nr_socks);
367 	local_bh_disable();
368 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
369 	local_bh_enable();
370 #ifdef UNIX_REFCNT_DEBUG
371 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
372 		atomic_read(&unix_nr_socks));
373 #endif
374 }
375 
376 static int unix_release_sock(struct sock *sk, int embrion)
377 {
378 	struct unix_sock *u = unix_sk(sk);
379 	struct dentry *dentry;
380 	struct vfsmount *mnt;
381 	struct sock *skpair;
382 	struct sk_buff *skb;
383 	int state;
384 
385 	unix_remove_socket(sk);
386 
387 	/* Clear state */
388 	unix_state_lock(sk);
389 	sock_orphan(sk);
390 	sk->sk_shutdown = SHUTDOWN_MASK;
391 	dentry	     = u->dentry;
392 	u->dentry    = NULL;
393 	mnt	     = u->mnt;
394 	u->mnt	     = NULL;
395 	state = sk->sk_state;
396 	sk->sk_state = TCP_CLOSE;
397 	unix_state_unlock(sk);
398 
399 	wake_up_interruptible_all(&u->peer_wait);
400 
401 	skpair = unix_peer(sk);
402 
403 	if (skpair != NULL) {
404 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
405 			unix_state_lock(skpair);
406 			/* No more writes */
407 			skpair->sk_shutdown = SHUTDOWN_MASK;
408 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
409 				skpair->sk_err = ECONNRESET;
410 			unix_state_unlock(skpair);
411 			skpair->sk_state_change(skpair);
412 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
413 		}
414 		sock_put(skpair); /* It may now die */
415 		unix_peer(sk) = NULL;
416 	}
417 
418 	/* Try to flush out this socket. Throw out buffers at least */
419 
420 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
421 		if (state == TCP_LISTEN)
422 			unix_release_sock(skb->sk, 1);
423 		/* passed fds are erased in the kfree_skb hook	      */
424 		kfree_skb(skb);
425 	}
426 
427 	if (dentry) {
428 		dput(dentry);
429 		mntput(mnt);
430 	}
431 
432 	sock_put(sk);
433 
434 	/* ---- Socket is dead now and most probably destroyed ---- */
435 
436 	/*
437 	 * Fixme: BSD difference: In BSD all sockets connected to use get
438 	 *	  ECONNRESET and we die on the spot. In Linux we behave
439 	 *	  like files and pipes do and wait for the last
440 	 *	  dereference.
441 	 *
442 	 * Can't we simply set sock->err?
443 	 *
444 	 *	  What the above comment does talk about? --ANK(980817)
445 	 */
446 
447 	if (unix_tot_inflight)
448 		unix_gc();		/* Garbage collect fds */
449 
450 	return 0;
451 }
452 
453 static int unix_listen(struct socket *sock, int backlog)
454 {
455 	int err;
456 	struct sock *sk = sock->sk;
457 	struct unix_sock *u = unix_sk(sk);
458 
459 	err = -EOPNOTSUPP;
460 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
461 		goto out;	/* Only stream/seqpacket sockets accept */
462 	err = -EINVAL;
463 	if (!u->addr)
464 		goto out;	/* No listens on an unbound socket */
465 	unix_state_lock(sk);
466 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
467 		goto out_unlock;
468 	if (backlog > sk->sk_max_ack_backlog)
469 		wake_up_interruptible_all(&u->peer_wait);
470 	sk->sk_max_ack_backlog	= backlog;
471 	sk->sk_state		= TCP_LISTEN;
472 	/* set credentials so connect can copy them */
473 	sk->sk_peercred.pid	= task_tgid_vnr(current);
474 	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
475 	err = 0;
476 
477 out_unlock:
478 	unix_state_unlock(sk);
479 out:
480 	return err;
481 }
482 
483 static int unix_release(struct socket *);
484 static int unix_bind(struct socket *, struct sockaddr *, int);
485 static int unix_stream_connect(struct socket *, struct sockaddr *,
486 			       int addr_len, int flags);
487 static int unix_socketpair(struct socket *, struct socket *);
488 static int unix_accept(struct socket *, struct socket *, int);
489 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
490 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
491 static unsigned int unix_dgram_poll(struct file *, struct socket *,
492 				    poll_table *);
493 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
494 static int unix_shutdown(struct socket *, int);
495 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
496 			       struct msghdr *, size_t);
497 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
498 			       struct msghdr *, size_t, int);
499 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
500 			      struct msghdr *, size_t);
501 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
502 			      struct msghdr *, size_t, int);
503 static int unix_dgram_connect(struct socket *, struct sockaddr *,
504 			      int, int);
505 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
506 				  struct msghdr *, size_t);
507 
508 static const struct proto_ops unix_stream_ops = {
509 	.family =	PF_UNIX,
510 	.owner =	THIS_MODULE,
511 	.release =	unix_release,
512 	.bind =		unix_bind,
513 	.connect =	unix_stream_connect,
514 	.socketpair =	unix_socketpair,
515 	.accept =	unix_accept,
516 	.getname =	unix_getname,
517 	.poll =		unix_poll,
518 	.ioctl =	unix_ioctl,
519 	.listen =	unix_listen,
520 	.shutdown =	unix_shutdown,
521 	.setsockopt =	sock_no_setsockopt,
522 	.getsockopt =	sock_no_getsockopt,
523 	.sendmsg =	unix_stream_sendmsg,
524 	.recvmsg =	unix_stream_recvmsg,
525 	.mmap =		sock_no_mmap,
526 	.sendpage =	sock_no_sendpage,
527 };
528 
529 static const struct proto_ops unix_dgram_ops = {
530 	.family =	PF_UNIX,
531 	.owner =	THIS_MODULE,
532 	.release =	unix_release,
533 	.bind =		unix_bind,
534 	.connect =	unix_dgram_connect,
535 	.socketpair =	unix_socketpair,
536 	.accept =	sock_no_accept,
537 	.getname =	unix_getname,
538 	.poll =		unix_dgram_poll,
539 	.ioctl =	unix_ioctl,
540 	.listen =	sock_no_listen,
541 	.shutdown =	unix_shutdown,
542 	.setsockopt =	sock_no_setsockopt,
543 	.getsockopt =	sock_no_getsockopt,
544 	.sendmsg =	unix_dgram_sendmsg,
545 	.recvmsg =	unix_dgram_recvmsg,
546 	.mmap =		sock_no_mmap,
547 	.sendpage =	sock_no_sendpage,
548 };
549 
550 static const struct proto_ops unix_seqpacket_ops = {
551 	.family =	PF_UNIX,
552 	.owner =	THIS_MODULE,
553 	.release =	unix_release,
554 	.bind =		unix_bind,
555 	.connect =	unix_stream_connect,
556 	.socketpair =	unix_socketpair,
557 	.accept =	unix_accept,
558 	.getname =	unix_getname,
559 	.poll =		unix_dgram_poll,
560 	.ioctl =	unix_ioctl,
561 	.listen =	unix_listen,
562 	.shutdown =	unix_shutdown,
563 	.setsockopt =	sock_no_setsockopt,
564 	.getsockopt =	sock_no_getsockopt,
565 	.sendmsg =	unix_seqpacket_sendmsg,
566 	.recvmsg =	unix_dgram_recvmsg,
567 	.mmap =		sock_no_mmap,
568 	.sendpage =	sock_no_sendpage,
569 };
570 
571 static struct proto unix_proto = {
572 	.name			= "UNIX",
573 	.owner			= THIS_MODULE,
574 	.obj_size		= sizeof(struct unix_sock),
575 };
576 
577 /*
578  * AF_UNIX sockets do not interact with hardware, hence they
579  * dont trigger interrupts - so it's safe for them to have
580  * bh-unsafe locking for their sk_receive_queue.lock. Split off
581  * this special lock-class by reinitializing the spinlock key:
582  */
583 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
584 
585 static struct sock *unix_create1(struct net *net, struct socket *sock)
586 {
587 	struct sock *sk = NULL;
588 	struct unix_sock *u;
589 
590 	atomic_inc(&unix_nr_socks);
591 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
592 		goto out;
593 
594 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
595 	if (!sk)
596 		goto out;
597 
598 	sock_init_data(sock, sk);
599 	lockdep_set_class(&sk->sk_receive_queue.lock,
600 				&af_unix_sk_receive_queue_lock_key);
601 
602 	sk->sk_write_space	= unix_write_space;
603 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
604 	sk->sk_destruct		= unix_sock_destructor;
605 	u	  = unix_sk(sk);
606 	u->dentry = NULL;
607 	u->mnt	  = NULL;
608 	spin_lock_init(&u->lock);
609 	atomic_long_set(&u->inflight, 0);
610 	INIT_LIST_HEAD(&u->link);
611 	mutex_init(&u->readlock); /* single task reading lock */
612 	init_waitqueue_head(&u->peer_wait);
613 	unix_insert_socket(unix_sockets_unbound, sk);
614 out:
615 	if (sk == NULL)
616 		atomic_dec(&unix_nr_socks);
617 	else {
618 		local_bh_disable();
619 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
620 		local_bh_enable();
621 	}
622 	return sk;
623 }
624 
625 static int unix_create(struct net *net, struct socket *sock, int protocol,
626 		       int kern)
627 {
628 	if (protocol && protocol != PF_UNIX)
629 		return -EPROTONOSUPPORT;
630 
631 	sock->state = SS_UNCONNECTED;
632 
633 	switch (sock->type) {
634 	case SOCK_STREAM:
635 		sock->ops = &unix_stream_ops;
636 		break;
637 		/*
638 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
639 		 *	nothing uses it.
640 		 */
641 	case SOCK_RAW:
642 		sock->type = SOCK_DGRAM;
643 	case SOCK_DGRAM:
644 		sock->ops = &unix_dgram_ops;
645 		break;
646 	case SOCK_SEQPACKET:
647 		sock->ops = &unix_seqpacket_ops;
648 		break;
649 	default:
650 		return -ESOCKTNOSUPPORT;
651 	}
652 
653 	return unix_create1(net, sock) ? 0 : -ENOMEM;
654 }
655 
656 static int unix_release(struct socket *sock)
657 {
658 	struct sock *sk = sock->sk;
659 
660 	if (!sk)
661 		return 0;
662 
663 	sock->sk = NULL;
664 
665 	return unix_release_sock(sk, 0);
666 }
667 
668 static int unix_autobind(struct socket *sock)
669 {
670 	struct sock *sk = sock->sk;
671 	struct net *net = sock_net(sk);
672 	struct unix_sock *u = unix_sk(sk);
673 	static u32 ordernum = 1;
674 	struct unix_address *addr;
675 	int err;
676 
677 	mutex_lock(&u->readlock);
678 
679 	err = 0;
680 	if (u->addr)
681 		goto out;
682 
683 	err = -ENOMEM;
684 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
685 	if (!addr)
686 		goto out;
687 
688 	addr->name->sun_family = AF_UNIX;
689 	atomic_set(&addr->refcnt, 1);
690 
691 retry:
692 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
693 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
694 
695 	spin_lock(&unix_table_lock);
696 	ordernum = (ordernum+1)&0xFFFFF;
697 
698 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
699 				      addr->hash)) {
700 		spin_unlock(&unix_table_lock);
701 		/* Sanity yield. It is unusual case, but yet... */
702 		if (!(ordernum&0xFF))
703 			yield();
704 		goto retry;
705 	}
706 	addr->hash ^= sk->sk_type;
707 
708 	__unix_remove_socket(sk);
709 	u->addr = addr;
710 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
711 	spin_unlock(&unix_table_lock);
712 	err = 0;
713 
714 out:	mutex_unlock(&u->readlock);
715 	return err;
716 }
717 
718 static struct sock *unix_find_other(struct net *net,
719 				    struct sockaddr_un *sunname, int len,
720 				    int type, unsigned hash, int *error)
721 {
722 	struct sock *u;
723 	struct path path;
724 	int err = 0;
725 
726 	if (sunname->sun_path[0]) {
727 		struct inode *inode;
728 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
729 		if (err)
730 			goto fail;
731 		inode = path.dentry->d_inode;
732 		err = inode_permission(inode, MAY_WRITE);
733 		if (err)
734 			goto put_fail;
735 
736 		err = -ECONNREFUSED;
737 		if (!S_ISSOCK(inode->i_mode))
738 			goto put_fail;
739 		u = unix_find_socket_byinode(net, inode);
740 		if (!u)
741 			goto put_fail;
742 
743 		if (u->sk_type == type)
744 			touch_atime(path.mnt, path.dentry);
745 
746 		path_put(&path);
747 
748 		err = -EPROTOTYPE;
749 		if (u->sk_type != type) {
750 			sock_put(u);
751 			goto fail;
752 		}
753 	} else {
754 		err = -ECONNREFUSED;
755 		u = unix_find_socket_byname(net, sunname, len, type, hash);
756 		if (u) {
757 			struct dentry *dentry;
758 			dentry = unix_sk(u)->dentry;
759 			if (dentry)
760 				touch_atime(unix_sk(u)->mnt, dentry);
761 		} else
762 			goto fail;
763 	}
764 	return u;
765 
766 put_fail:
767 	path_put(&path);
768 fail:
769 	*error = err;
770 	return NULL;
771 }
772 
773 
774 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
775 {
776 	struct sock *sk = sock->sk;
777 	struct net *net = sock_net(sk);
778 	struct unix_sock *u = unix_sk(sk);
779 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
780 	struct dentry *dentry = NULL;
781 	struct nameidata nd;
782 	int err;
783 	unsigned hash;
784 	struct unix_address *addr;
785 	struct hlist_head *list;
786 
787 	err = -EINVAL;
788 	if (sunaddr->sun_family != AF_UNIX)
789 		goto out;
790 
791 	if (addr_len == sizeof(short)) {
792 		err = unix_autobind(sock);
793 		goto out;
794 	}
795 
796 	err = unix_mkname(sunaddr, addr_len, &hash);
797 	if (err < 0)
798 		goto out;
799 	addr_len = err;
800 
801 	mutex_lock(&u->readlock);
802 
803 	err = -EINVAL;
804 	if (u->addr)
805 		goto out_up;
806 
807 	err = -ENOMEM;
808 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
809 	if (!addr)
810 		goto out_up;
811 
812 	memcpy(addr->name, sunaddr, addr_len);
813 	addr->len = addr_len;
814 	addr->hash = hash ^ sk->sk_type;
815 	atomic_set(&addr->refcnt, 1);
816 
817 	if (sunaddr->sun_path[0]) {
818 		unsigned int mode;
819 		err = 0;
820 		/*
821 		 * Get the parent directory, calculate the hash for last
822 		 * component.
823 		 */
824 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
825 		if (err)
826 			goto out_mknod_parent;
827 
828 		dentry = lookup_create(&nd, 0);
829 		err = PTR_ERR(dentry);
830 		if (IS_ERR(dentry))
831 			goto out_mknod_unlock;
832 
833 		/*
834 		 * All right, let's create it.
835 		 */
836 		mode = S_IFSOCK |
837 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
838 		err = mnt_want_write(nd.path.mnt);
839 		if (err)
840 			goto out_mknod_dput;
841 		err = security_path_mknod(&nd.path, dentry, mode, 0);
842 		if (err)
843 			goto out_mknod_drop_write;
844 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
845 out_mknod_drop_write:
846 		mnt_drop_write(nd.path.mnt);
847 		if (err)
848 			goto out_mknod_dput;
849 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
850 		dput(nd.path.dentry);
851 		nd.path.dentry = dentry;
852 
853 		addr->hash = UNIX_HASH_SIZE;
854 	}
855 
856 	spin_lock(&unix_table_lock);
857 
858 	if (!sunaddr->sun_path[0]) {
859 		err = -EADDRINUSE;
860 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
861 					      sk->sk_type, hash)) {
862 			unix_release_addr(addr);
863 			goto out_unlock;
864 		}
865 
866 		list = &unix_socket_table[addr->hash];
867 	} else {
868 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
869 		u->dentry = nd.path.dentry;
870 		u->mnt    = nd.path.mnt;
871 	}
872 
873 	err = 0;
874 	__unix_remove_socket(sk);
875 	u->addr = addr;
876 	__unix_insert_socket(list, sk);
877 
878 out_unlock:
879 	spin_unlock(&unix_table_lock);
880 out_up:
881 	mutex_unlock(&u->readlock);
882 out:
883 	return err;
884 
885 out_mknod_dput:
886 	dput(dentry);
887 out_mknod_unlock:
888 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
889 	path_put(&nd.path);
890 out_mknod_parent:
891 	if (err == -EEXIST)
892 		err = -EADDRINUSE;
893 	unix_release_addr(addr);
894 	goto out_up;
895 }
896 
897 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
898 {
899 	if (unlikely(sk1 == sk2) || !sk2) {
900 		unix_state_lock(sk1);
901 		return;
902 	}
903 	if (sk1 < sk2) {
904 		unix_state_lock(sk1);
905 		unix_state_lock_nested(sk2);
906 	} else {
907 		unix_state_lock(sk2);
908 		unix_state_lock_nested(sk1);
909 	}
910 }
911 
912 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
913 {
914 	if (unlikely(sk1 == sk2) || !sk2) {
915 		unix_state_unlock(sk1);
916 		return;
917 	}
918 	unix_state_unlock(sk1);
919 	unix_state_unlock(sk2);
920 }
921 
922 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
923 			      int alen, int flags)
924 {
925 	struct sock *sk = sock->sk;
926 	struct net *net = sock_net(sk);
927 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
928 	struct sock *other;
929 	unsigned hash;
930 	int err;
931 
932 	if (addr->sa_family != AF_UNSPEC) {
933 		err = unix_mkname(sunaddr, alen, &hash);
934 		if (err < 0)
935 			goto out;
936 		alen = err;
937 
938 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
939 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
940 			goto out;
941 
942 restart:
943 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
944 		if (!other)
945 			goto out;
946 
947 		unix_state_double_lock(sk, other);
948 
949 		/* Apparently VFS overslept socket death. Retry. */
950 		if (sock_flag(other, SOCK_DEAD)) {
951 			unix_state_double_unlock(sk, other);
952 			sock_put(other);
953 			goto restart;
954 		}
955 
956 		err = -EPERM;
957 		if (!unix_may_send(sk, other))
958 			goto out_unlock;
959 
960 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
961 		if (err)
962 			goto out_unlock;
963 
964 	} else {
965 		/*
966 		 *	1003.1g breaking connected state with AF_UNSPEC
967 		 */
968 		other = NULL;
969 		unix_state_double_lock(sk, other);
970 	}
971 
972 	/*
973 	 * If it was connected, reconnect.
974 	 */
975 	if (unix_peer(sk)) {
976 		struct sock *old_peer = unix_peer(sk);
977 		unix_peer(sk) = other;
978 		unix_state_double_unlock(sk, other);
979 
980 		if (other != old_peer)
981 			unix_dgram_disconnected(sk, old_peer);
982 		sock_put(old_peer);
983 	} else {
984 		unix_peer(sk) = other;
985 		unix_state_double_unlock(sk, other);
986 	}
987 	return 0;
988 
989 out_unlock:
990 	unix_state_double_unlock(sk, other);
991 	sock_put(other);
992 out:
993 	return err;
994 }
995 
996 static long unix_wait_for_peer(struct sock *other, long timeo)
997 {
998 	struct unix_sock *u = unix_sk(other);
999 	int sched;
1000 	DEFINE_WAIT(wait);
1001 
1002 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1003 
1004 	sched = !sock_flag(other, SOCK_DEAD) &&
1005 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1006 		unix_recvq_full(other);
1007 
1008 	unix_state_unlock(other);
1009 
1010 	if (sched)
1011 		timeo = schedule_timeout(timeo);
1012 
1013 	finish_wait(&u->peer_wait, &wait);
1014 	return timeo;
1015 }
1016 
1017 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1018 			       int addr_len, int flags)
1019 {
1020 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1021 	struct sock *sk = sock->sk;
1022 	struct net *net = sock_net(sk);
1023 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1024 	struct sock *newsk = NULL;
1025 	struct sock *other = NULL;
1026 	struct sk_buff *skb = NULL;
1027 	unsigned hash;
1028 	int st;
1029 	int err;
1030 	long timeo;
1031 
1032 	err = unix_mkname(sunaddr, addr_len, &hash);
1033 	if (err < 0)
1034 		goto out;
1035 	addr_len = err;
1036 
1037 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1038 	    (err = unix_autobind(sock)) != 0)
1039 		goto out;
1040 
1041 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1042 
1043 	/* First of all allocate resources.
1044 	   If we will make it after state is locked,
1045 	   we will have to recheck all again in any case.
1046 	 */
1047 
1048 	err = -ENOMEM;
1049 
1050 	/* create new sock for complete connection */
1051 	newsk = unix_create1(sock_net(sk), NULL);
1052 	if (newsk == NULL)
1053 		goto out;
1054 
1055 	/* Allocate skb for sending to listening sock */
1056 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1057 	if (skb == NULL)
1058 		goto out;
1059 
1060 restart:
1061 	/*  Find listening sock. */
1062 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1063 	if (!other)
1064 		goto out;
1065 
1066 	/* Latch state of peer */
1067 	unix_state_lock(other);
1068 
1069 	/* Apparently VFS overslept socket death. Retry. */
1070 	if (sock_flag(other, SOCK_DEAD)) {
1071 		unix_state_unlock(other);
1072 		sock_put(other);
1073 		goto restart;
1074 	}
1075 
1076 	err = -ECONNREFUSED;
1077 	if (other->sk_state != TCP_LISTEN)
1078 		goto out_unlock;
1079 	if (other->sk_shutdown & RCV_SHUTDOWN)
1080 		goto out_unlock;
1081 
1082 	if (unix_recvq_full(other)) {
1083 		err = -EAGAIN;
1084 		if (!timeo)
1085 			goto out_unlock;
1086 
1087 		timeo = unix_wait_for_peer(other, timeo);
1088 
1089 		err = sock_intr_errno(timeo);
1090 		if (signal_pending(current))
1091 			goto out;
1092 		sock_put(other);
1093 		goto restart;
1094 	}
1095 
1096 	/* Latch our state.
1097 
1098 	   It is tricky place. We need to grab write lock and cannot
1099 	   drop lock on peer. It is dangerous because deadlock is
1100 	   possible. Connect to self case and simultaneous
1101 	   attempt to connect are eliminated by checking socket
1102 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1103 	   check this before attempt to grab lock.
1104 
1105 	   Well, and we have to recheck the state after socket locked.
1106 	 */
1107 	st = sk->sk_state;
1108 
1109 	switch (st) {
1110 	case TCP_CLOSE:
1111 		/* This is ok... continue with connect */
1112 		break;
1113 	case TCP_ESTABLISHED:
1114 		/* Socket is already connected */
1115 		err = -EISCONN;
1116 		goto out_unlock;
1117 	default:
1118 		err = -EINVAL;
1119 		goto out_unlock;
1120 	}
1121 
1122 	unix_state_lock_nested(sk);
1123 
1124 	if (sk->sk_state != st) {
1125 		unix_state_unlock(sk);
1126 		unix_state_unlock(other);
1127 		sock_put(other);
1128 		goto restart;
1129 	}
1130 
1131 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1132 	if (err) {
1133 		unix_state_unlock(sk);
1134 		goto out_unlock;
1135 	}
1136 
1137 	/* The way is open! Fastly set all the necessary fields... */
1138 
1139 	sock_hold(sk);
1140 	unix_peer(newsk)	= sk;
1141 	newsk->sk_state		= TCP_ESTABLISHED;
1142 	newsk->sk_type		= sk->sk_type;
1143 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1144 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1145 	newu = unix_sk(newsk);
1146 	newsk->sk_wq		= &newu->peer_wq;
1147 	otheru = unix_sk(other);
1148 
1149 	/* copy address information from listening to new sock*/
1150 	if (otheru->addr) {
1151 		atomic_inc(&otheru->addr->refcnt);
1152 		newu->addr = otheru->addr;
1153 	}
1154 	if (otheru->dentry) {
1155 		newu->dentry	= dget(otheru->dentry);
1156 		newu->mnt	= mntget(otheru->mnt);
1157 	}
1158 
1159 	/* Set credentials */
1160 	sk->sk_peercred = other->sk_peercred;
1161 
1162 	sock->state	= SS_CONNECTED;
1163 	sk->sk_state	= TCP_ESTABLISHED;
1164 	sock_hold(newsk);
1165 
1166 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1167 	unix_peer(sk)	= newsk;
1168 
1169 	unix_state_unlock(sk);
1170 
1171 	/* take ten and and send info to listening sock */
1172 	spin_lock(&other->sk_receive_queue.lock);
1173 	__skb_queue_tail(&other->sk_receive_queue, skb);
1174 	spin_unlock(&other->sk_receive_queue.lock);
1175 	unix_state_unlock(other);
1176 	other->sk_data_ready(other, 0);
1177 	sock_put(other);
1178 	return 0;
1179 
1180 out_unlock:
1181 	if (other)
1182 		unix_state_unlock(other);
1183 
1184 out:
1185 	kfree_skb(skb);
1186 	if (newsk)
1187 		unix_release_sock(newsk, 0);
1188 	if (other)
1189 		sock_put(other);
1190 	return err;
1191 }
1192 
1193 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1194 {
1195 	struct sock *ska = socka->sk, *skb = sockb->sk;
1196 
1197 	/* Join our sockets back to back */
1198 	sock_hold(ska);
1199 	sock_hold(skb);
1200 	unix_peer(ska) = skb;
1201 	unix_peer(skb) = ska;
1202 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1203 	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1204 	ska->sk_peercred.uid = skb->sk_peercred.uid;
1205 	ska->sk_peercred.gid = skb->sk_peercred.gid;
1206 
1207 	if (ska->sk_type != SOCK_DGRAM) {
1208 		ska->sk_state = TCP_ESTABLISHED;
1209 		skb->sk_state = TCP_ESTABLISHED;
1210 		socka->state  = SS_CONNECTED;
1211 		sockb->state  = SS_CONNECTED;
1212 	}
1213 	return 0;
1214 }
1215 
1216 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1217 {
1218 	struct sock *sk = sock->sk;
1219 	struct sock *tsk;
1220 	struct sk_buff *skb;
1221 	int err;
1222 
1223 	err = -EOPNOTSUPP;
1224 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1225 		goto out;
1226 
1227 	err = -EINVAL;
1228 	if (sk->sk_state != TCP_LISTEN)
1229 		goto out;
1230 
1231 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1232 	 * so that no locks are necessary.
1233 	 */
1234 
1235 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1236 	if (!skb) {
1237 		/* This means receive shutdown. */
1238 		if (err == 0)
1239 			err = -EINVAL;
1240 		goto out;
1241 	}
1242 
1243 	tsk = skb->sk;
1244 	skb_free_datagram(sk, skb);
1245 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1246 
1247 	/* attach accepted sock to socket */
1248 	unix_state_lock(tsk);
1249 	newsock->state = SS_CONNECTED;
1250 	sock_graft(tsk, newsock);
1251 	unix_state_unlock(tsk);
1252 	return 0;
1253 
1254 out:
1255 	return err;
1256 }
1257 
1258 
1259 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1260 {
1261 	struct sock *sk = sock->sk;
1262 	struct unix_sock *u;
1263 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1264 	int err = 0;
1265 
1266 	if (peer) {
1267 		sk = unix_peer_get(sk);
1268 
1269 		err = -ENOTCONN;
1270 		if (!sk)
1271 			goto out;
1272 		err = 0;
1273 	} else {
1274 		sock_hold(sk);
1275 	}
1276 
1277 	u = unix_sk(sk);
1278 	unix_state_lock(sk);
1279 	if (!u->addr) {
1280 		sunaddr->sun_family = AF_UNIX;
1281 		sunaddr->sun_path[0] = 0;
1282 		*uaddr_len = sizeof(short);
1283 	} else {
1284 		struct unix_address *addr = u->addr;
1285 
1286 		*uaddr_len = addr->len;
1287 		memcpy(sunaddr, addr->name, *uaddr_len);
1288 	}
1289 	unix_state_unlock(sk);
1290 	sock_put(sk);
1291 out:
1292 	return err;
1293 }
1294 
1295 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1296 {
1297 	int i;
1298 
1299 	scm->fp = UNIXCB(skb).fp;
1300 	skb->destructor = sock_wfree;
1301 	UNIXCB(skb).fp = NULL;
1302 
1303 	for (i = scm->fp->count-1; i >= 0; i--)
1304 		unix_notinflight(scm->fp->fp[i]);
1305 }
1306 
1307 static void unix_destruct_fds(struct sk_buff *skb)
1308 {
1309 	struct scm_cookie scm;
1310 	memset(&scm, 0, sizeof(scm));
1311 	unix_detach_fds(&scm, skb);
1312 
1313 	/* Alas, it calls VFS */
1314 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1315 	scm_destroy(&scm);
1316 	sock_wfree(skb);
1317 }
1318 
1319 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1320 {
1321 	int i;
1322 
1323 	/*
1324 	 * Need to duplicate file references for the sake of garbage
1325 	 * collection.  Otherwise a socket in the fps might become a
1326 	 * candidate for GC while the skb is not yet queued.
1327 	 */
1328 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1329 	if (!UNIXCB(skb).fp)
1330 		return -ENOMEM;
1331 
1332 	for (i = scm->fp->count-1; i >= 0; i--)
1333 		unix_inflight(scm->fp->fp[i]);
1334 	skb->destructor = unix_destruct_fds;
1335 	return 0;
1336 }
1337 
1338 /*
1339  *	Send AF_UNIX data.
1340  */
1341 
1342 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1343 			      struct msghdr *msg, size_t len)
1344 {
1345 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1346 	struct sock *sk = sock->sk;
1347 	struct net *net = sock_net(sk);
1348 	struct unix_sock *u = unix_sk(sk);
1349 	struct sockaddr_un *sunaddr = msg->msg_name;
1350 	struct sock *other = NULL;
1351 	int namelen = 0; /* fake GCC */
1352 	int err;
1353 	unsigned hash;
1354 	struct sk_buff *skb;
1355 	long timeo;
1356 	struct scm_cookie tmp_scm;
1357 
1358 	if (NULL == siocb->scm)
1359 		siocb->scm = &tmp_scm;
1360 	wait_for_unix_gc();
1361 	err = scm_send(sock, msg, siocb->scm);
1362 	if (err < 0)
1363 		return err;
1364 
1365 	err = -EOPNOTSUPP;
1366 	if (msg->msg_flags&MSG_OOB)
1367 		goto out;
1368 
1369 	if (msg->msg_namelen) {
1370 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1371 		if (err < 0)
1372 			goto out;
1373 		namelen = err;
1374 	} else {
1375 		sunaddr = NULL;
1376 		err = -ENOTCONN;
1377 		other = unix_peer_get(sk);
1378 		if (!other)
1379 			goto out;
1380 	}
1381 
1382 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1383 	    && (err = unix_autobind(sock)) != 0)
1384 		goto out;
1385 
1386 	err = -EMSGSIZE;
1387 	if (len > sk->sk_sndbuf - 32)
1388 		goto out;
1389 
1390 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1391 	if (skb == NULL)
1392 		goto out;
1393 
1394 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1395 	if (siocb->scm->fp) {
1396 		err = unix_attach_fds(siocb->scm, skb);
1397 		if (err)
1398 			goto out_free;
1399 	}
1400 	unix_get_secdata(siocb->scm, skb);
1401 
1402 	skb_reset_transport_header(skb);
1403 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1404 	if (err)
1405 		goto out_free;
1406 
1407 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1408 
1409 restart:
1410 	if (!other) {
1411 		err = -ECONNRESET;
1412 		if (sunaddr == NULL)
1413 			goto out_free;
1414 
1415 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1416 					hash, &err);
1417 		if (other == NULL)
1418 			goto out_free;
1419 	}
1420 
1421 	unix_state_lock(other);
1422 	err = -EPERM;
1423 	if (!unix_may_send(sk, other))
1424 		goto out_unlock;
1425 
1426 	if (sock_flag(other, SOCK_DEAD)) {
1427 		/*
1428 		 *	Check with 1003.1g - what should
1429 		 *	datagram error
1430 		 */
1431 		unix_state_unlock(other);
1432 		sock_put(other);
1433 
1434 		err = 0;
1435 		unix_state_lock(sk);
1436 		if (unix_peer(sk) == other) {
1437 			unix_peer(sk) = NULL;
1438 			unix_state_unlock(sk);
1439 
1440 			unix_dgram_disconnected(sk, other);
1441 			sock_put(other);
1442 			err = -ECONNREFUSED;
1443 		} else {
1444 			unix_state_unlock(sk);
1445 		}
1446 
1447 		other = NULL;
1448 		if (err)
1449 			goto out_free;
1450 		goto restart;
1451 	}
1452 
1453 	err = -EPIPE;
1454 	if (other->sk_shutdown & RCV_SHUTDOWN)
1455 		goto out_unlock;
1456 
1457 	if (sk->sk_type != SOCK_SEQPACKET) {
1458 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1459 		if (err)
1460 			goto out_unlock;
1461 	}
1462 
1463 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1464 		if (!timeo) {
1465 			err = -EAGAIN;
1466 			goto out_unlock;
1467 		}
1468 
1469 		timeo = unix_wait_for_peer(other, timeo);
1470 
1471 		err = sock_intr_errno(timeo);
1472 		if (signal_pending(current))
1473 			goto out_free;
1474 
1475 		goto restart;
1476 	}
1477 
1478 	skb_queue_tail(&other->sk_receive_queue, skb);
1479 	unix_state_unlock(other);
1480 	other->sk_data_ready(other, len);
1481 	sock_put(other);
1482 	scm_destroy(siocb->scm);
1483 	return len;
1484 
1485 out_unlock:
1486 	unix_state_unlock(other);
1487 out_free:
1488 	kfree_skb(skb);
1489 out:
1490 	if (other)
1491 		sock_put(other);
1492 	scm_destroy(siocb->scm);
1493 	return err;
1494 }
1495 
1496 
1497 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1498 			       struct msghdr *msg, size_t len)
1499 {
1500 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1501 	struct sock *sk = sock->sk;
1502 	struct sock *other = NULL;
1503 	struct sockaddr_un *sunaddr = msg->msg_name;
1504 	int err, size;
1505 	struct sk_buff *skb;
1506 	int sent = 0;
1507 	struct scm_cookie tmp_scm;
1508 	bool fds_sent = false;
1509 
1510 	if (NULL == siocb->scm)
1511 		siocb->scm = &tmp_scm;
1512 	wait_for_unix_gc();
1513 	err = scm_send(sock, msg, siocb->scm);
1514 	if (err < 0)
1515 		return err;
1516 
1517 	err = -EOPNOTSUPP;
1518 	if (msg->msg_flags&MSG_OOB)
1519 		goto out_err;
1520 
1521 	if (msg->msg_namelen) {
1522 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1523 		goto out_err;
1524 	} else {
1525 		sunaddr = NULL;
1526 		err = -ENOTCONN;
1527 		other = unix_peer(sk);
1528 		if (!other)
1529 			goto out_err;
1530 	}
1531 
1532 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1533 		goto pipe_err;
1534 
1535 	while (sent < len) {
1536 		/*
1537 		 *	Optimisation for the fact that under 0.01% of X
1538 		 *	messages typically need breaking up.
1539 		 */
1540 
1541 		size = len-sent;
1542 
1543 		/* Keep two messages in the pipe so it schedules better */
1544 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1545 			size = (sk->sk_sndbuf >> 1) - 64;
1546 
1547 		if (size > SKB_MAX_ALLOC)
1548 			size = SKB_MAX_ALLOC;
1549 
1550 		/*
1551 		 *	Grab a buffer
1552 		 */
1553 
1554 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1555 					  &err);
1556 
1557 		if (skb == NULL)
1558 			goto out_err;
1559 
1560 		/*
1561 		 *	If you pass two values to the sock_alloc_send_skb
1562 		 *	it tries to grab the large buffer with GFP_NOFS
1563 		 *	(which can fail easily), and if it fails grab the
1564 		 *	fallback size buffer which is under a page and will
1565 		 *	succeed. [Alan]
1566 		 */
1567 		size = min_t(int, size, skb_tailroom(skb));
1568 
1569 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1570 		/* Only send the fds in the first buffer */
1571 		if (siocb->scm->fp && !fds_sent) {
1572 			err = unix_attach_fds(siocb->scm, skb);
1573 			if (err) {
1574 				kfree_skb(skb);
1575 				goto out_err;
1576 			}
1577 			fds_sent = true;
1578 		}
1579 
1580 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1581 		if (err) {
1582 			kfree_skb(skb);
1583 			goto out_err;
1584 		}
1585 
1586 		unix_state_lock(other);
1587 
1588 		if (sock_flag(other, SOCK_DEAD) ||
1589 		    (other->sk_shutdown & RCV_SHUTDOWN))
1590 			goto pipe_err_free;
1591 
1592 		skb_queue_tail(&other->sk_receive_queue, skb);
1593 		unix_state_unlock(other);
1594 		other->sk_data_ready(other, size);
1595 		sent += size;
1596 	}
1597 
1598 	scm_destroy(siocb->scm);
1599 	siocb->scm = NULL;
1600 
1601 	return sent;
1602 
1603 pipe_err_free:
1604 	unix_state_unlock(other);
1605 	kfree_skb(skb);
1606 pipe_err:
1607 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1608 		send_sig(SIGPIPE, current, 0);
1609 	err = -EPIPE;
1610 out_err:
1611 	scm_destroy(siocb->scm);
1612 	siocb->scm = NULL;
1613 	return sent ? : err;
1614 }
1615 
1616 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1617 				  struct msghdr *msg, size_t len)
1618 {
1619 	int err;
1620 	struct sock *sk = sock->sk;
1621 
1622 	err = sock_error(sk);
1623 	if (err)
1624 		return err;
1625 
1626 	if (sk->sk_state != TCP_ESTABLISHED)
1627 		return -ENOTCONN;
1628 
1629 	if (msg->msg_namelen)
1630 		msg->msg_namelen = 0;
1631 
1632 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1633 }
1634 
1635 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1636 {
1637 	struct unix_sock *u = unix_sk(sk);
1638 
1639 	msg->msg_namelen = 0;
1640 	if (u->addr) {
1641 		msg->msg_namelen = u->addr->len;
1642 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1643 	}
1644 }
1645 
1646 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1647 			      struct msghdr *msg, size_t size,
1648 			      int flags)
1649 {
1650 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1651 	struct scm_cookie tmp_scm;
1652 	struct sock *sk = sock->sk;
1653 	struct unix_sock *u = unix_sk(sk);
1654 	int noblock = flags & MSG_DONTWAIT;
1655 	struct sk_buff *skb;
1656 	int err;
1657 
1658 	err = -EOPNOTSUPP;
1659 	if (flags&MSG_OOB)
1660 		goto out;
1661 
1662 	msg->msg_namelen = 0;
1663 
1664 	mutex_lock(&u->readlock);
1665 
1666 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1667 	if (!skb) {
1668 		unix_state_lock(sk);
1669 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1670 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1671 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1672 			err = 0;
1673 		unix_state_unlock(sk);
1674 		goto out_unlock;
1675 	}
1676 
1677 	wake_up_interruptible_sync(&u->peer_wait);
1678 
1679 	if (msg->msg_name)
1680 		unix_copy_addr(msg, skb->sk);
1681 
1682 	if (size > skb->len)
1683 		size = skb->len;
1684 	else if (size < skb->len)
1685 		msg->msg_flags |= MSG_TRUNC;
1686 
1687 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1688 	if (err)
1689 		goto out_free;
1690 
1691 	if (!siocb->scm) {
1692 		siocb->scm = &tmp_scm;
1693 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1694 	}
1695 	siocb->scm->creds = *UNIXCREDS(skb);
1696 	unix_set_secdata(siocb->scm, skb);
1697 
1698 	if (!(flags & MSG_PEEK)) {
1699 		if (UNIXCB(skb).fp)
1700 			unix_detach_fds(siocb->scm, skb);
1701 	} else {
1702 		/* It is questionable: on PEEK we could:
1703 		   - do not return fds - good, but too simple 8)
1704 		   - return fds, and do not return them on read (old strategy,
1705 		     apparently wrong)
1706 		   - clone fds (I chose it for now, it is the most universal
1707 		     solution)
1708 
1709 		   POSIX 1003.1g does not actually define this clearly
1710 		   at all. POSIX 1003.1g doesn't define a lot of things
1711 		   clearly however!
1712 
1713 		*/
1714 		if (UNIXCB(skb).fp)
1715 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1716 	}
1717 	err = size;
1718 
1719 	scm_recv(sock, msg, siocb->scm, flags);
1720 
1721 out_free:
1722 	skb_free_datagram(sk, skb);
1723 out_unlock:
1724 	mutex_unlock(&u->readlock);
1725 out:
1726 	return err;
1727 }
1728 
1729 /*
1730  *	Sleep until data has arrive. But check for races..
1731  */
1732 
1733 static long unix_stream_data_wait(struct sock *sk, long timeo)
1734 {
1735 	DEFINE_WAIT(wait);
1736 
1737 	unix_state_lock(sk);
1738 
1739 	for (;;) {
1740 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1741 
1742 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1743 		    sk->sk_err ||
1744 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1745 		    signal_pending(current) ||
1746 		    !timeo)
1747 			break;
1748 
1749 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1750 		unix_state_unlock(sk);
1751 		timeo = schedule_timeout(timeo);
1752 		unix_state_lock(sk);
1753 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1754 	}
1755 
1756 	finish_wait(sk_sleep(sk), &wait);
1757 	unix_state_unlock(sk);
1758 	return timeo;
1759 }
1760 
1761 
1762 
1763 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1764 			       struct msghdr *msg, size_t size,
1765 			       int flags)
1766 {
1767 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1768 	struct scm_cookie tmp_scm;
1769 	struct sock *sk = sock->sk;
1770 	struct unix_sock *u = unix_sk(sk);
1771 	struct sockaddr_un *sunaddr = msg->msg_name;
1772 	int copied = 0;
1773 	int check_creds = 0;
1774 	int target;
1775 	int err = 0;
1776 	long timeo;
1777 
1778 	err = -EINVAL;
1779 	if (sk->sk_state != TCP_ESTABLISHED)
1780 		goto out;
1781 
1782 	err = -EOPNOTSUPP;
1783 	if (flags&MSG_OOB)
1784 		goto out;
1785 
1786 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1787 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1788 
1789 	msg->msg_namelen = 0;
1790 
1791 	/* Lock the socket to prevent queue disordering
1792 	 * while sleeps in memcpy_tomsg
1793 	 */
1794 
1795 	if (!siocb->scm) {
1796 		siocb->scm = &tmp_scm;
1797 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1798 	}
1799 
1800 	mutex_lock(&u->readlock);
1801 
1802 	do {
1803 		int chunk;
1804 		struct sk_buff *skb;
1805 
1806 		unix_state_lock(sk);
1807 		skb = skb_dequeue(&sk->sk_receive_queue);
1808 		if (skb == NULL) {
1809 			if (copied >= target)
1810 				goto unlock;
1811 
1812 			/*
1813 			 *	POSIX 1003.1g mandates this order.
1814 			 */
1815 
1816 			err = sock_error(sk);
1817 			if (err)
1818 				goto unlock;
1819 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1820 				goto unlock;
1821 
1822 			unix_state_unlock(sk);
1823 			err = -EAGAIN;
1824 			if (!timeo)
1825 				break;
1826 			mutex_unlock(&u->readlock);
1827 
1828 			timeo = unix_stream_data_wait(sk, timeo);
1829 
1830 			if (signal_pending(current)) {
1831 				err = sock_intr_errno(timeo);
1832 				goto out;
1833 			}
1834 			mutex_lock(&u->readlock);
1835 			continue;
1836  unlock:
1837 			unix_state_unlock(sk);
1838 			break;
1839 		}
1840 		unix_state_unlock(sk);
1841 
1842 		if (check_creds) {
1843 			/* Never glue messages from different writers */
1844 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1845 				   sizeof(siocb->scm->creds)) != 0) {
1846 				skb_queue_head(&sk->sk_receive_queue, skb);
1847 				break;
1848 			}
1849 		} else {
1850 			/* Copy credentials */
1851 			siocb->scm->creds = *UNIXCREDS(skb);
1852 			check_creds = 1;
1853 		}
1854 
1855 		/* Copy address just once */
1856 		if (sunaddr) {
1857 			unix_copy_addr(msg, skb->sk);
1858 			sunaddr = NULL;
1859 		}
1860 
1861 		chunk = min_t(unsigned int, skb->len, size);
1862 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1863 			skb_queue_head(&sk->sk_receive_queue, skb);
1864 			if (copied == 0)
1865 				copied = -EFAULT;
1866 			break;
1867 		}
1868 		copied += chunk;
1869 		size -= chunk;
1870 
1871 		/* Mark read part of skb as used */
1872 		if (!(flags & MSG_PEEK)) {
1873 			skb_pull(skb, chunk);
1874 
1875 			if (UNIXCB(skb).fp)
1876 				unix_detach_fds(siocb->scm, skb);
1877 
1878 			/* put the skb back if we didn't use it up.. */
1879 			if (skb->len) {
1880 				skb_queue_head(&sk->sk_receive_queue, skb);
1881 				break;
1882 			}
1883 
1884 			kfree_skb(skb);
1885 
1886 			if (siocb->scm->fp)
1887 				break;
1888 		} else {
1889 			/* It is questionable, see note in unix_dgram_recvmsg.
1890 			 */
1891 			if (UNIXCB(skb).fp)
1892 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1893 
1894 			/* put message back and return */
1895 			skb_queue_head(&sk->sk_receive_queue, skb);
1896 			break;
1897 		}
1898 	} while (size);
1899 
1900 	mutex_unlock(&u->readlock);
1901 	scm_recv(sock, msg, siocb->scm, flags);
1902 out:
1903 	return copied ? : err;
1904 }
1905 
1906 static int unix_shutdown(struct socket *sock, int mode)
1907 {
1908 	struct sock *sk = sock->sk;
1909 	struct sock *other;
1910 
1911 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1912 
1913 	if (mode) {
1914 		unix_state_lock(sk);
1915 		sk->sk_shutdown |= mode;
1916 		other = unix_peer(sk);
1917 		if (other)
1918 			sock_hold(other);
1919 		unix_state_unlock(sk);
1920 		sk->sk_state_change(sk);
1921 
1922 		if (other &&
1923 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1924 
1925 			int peer_mode = 0;
1926 
1927 			if (mode&RCV_SHUTDOWN)
1928 				peer_mode |= SEND_SHUTDOWN;
1929 			if (mode&SEND_SHUTDOWN)
1930 				peer_mode |= RCV_SHUTDOWN;
1931 			unix_state_lock(other);
1932 			other->sk_shutdown |= peer_mode;
1933 			unix_state_unlock(other);
1934 			other->sk_state_change(other);
1935 			if (peer_mode == SHUTDOWN_MASK)
1936 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1937 			else if (peer_mode & RCV_SHUTDOWN)
1938 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1939 		}
1940 		if (other)
1941 			sock_put(other);
1942 	}
1943 	return 0;
1944 }
1945 
1946 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1947 {
1948 	struct sock *sk = sock->sk;
1949 	long amount = 0;
1950 	int err;
1951 
1952 	switch (cmd) {
1953 	case SIOCOUTQ:
1954 		amount = sk_wmem_alloc_get(sk);
1955 		err = put_user(amount, (int __user *)arg);
1956 		break;
1957 	case SIOCINQ:
1958 		{
1959 			struct sk_buff *skb;
1960 
1961 			if (sk->sk_state == TCP_LISTEN) {
1962 				err = -EINVAL;
1963 				break;
1964 			}
1965 
1966 			spin_lock(&sk->sk_receive_queue.lock);
1967 			if (sk->sk_type == SOCK_STREAM ||
1968 			    sk->sk_type == SOCK_SEQPACKET) {
1969 				skb_queue_walk(&sk->sk_receive_queue, skb)
1970 					amount += skb->len;
1971 			} else {
1972 				skb = skb_peek(&sk->sk_receive_queue);
1973 				if (skb)
1974 					amount = skb->len;
1975 			}
1976 			spin_unlock(&sk->sk_receive_queue.lock);
1977 			err = put_user(amount, (int __user *)arg);
1978 			break;
1979 		}
1980 
1981 	default:
1982 		err = -ENOIOCTLCMD;
1983 		break;
1984 	}
1985 	return err;
1986 }
1987 
1988 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1989 {
1990 	struct sock *sk = sock->sk;
1991 	unsigned int mask;
1992 
1993 	sock_poll_wait(file, sk_sleep(sk), wait);
1994 	mask = 0;
1995 
1996 	/* exceptional events? */
1997 	if (sk->sk_err)
1998 		mask |= POLLERR;
1999 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2000 		mask |= POLLHUP;
2001 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2002 		mask |= POLLRDHUP;
2003 
2004 	/* readable? */
2005 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2006 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2007 		mask |= POLLIN | POLLRDNORM;
2008 
2009 	/* Connection-based need to check for termination and startup */
2010 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2011 	    sk->sk_state == TCP_CLOSE)
2012 		mask |= POLLHUP;
2013 
2014 	/*
2015 	 * we set writable also when the other side has shut down the
2016 	 * connection. This prevents stuck sockets.
2017 	 */
2018 	if (unix_writable(sk))
2019 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2020 
2021 	return mask;
2022 }
2023 
2024 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2025 				    poll_table *wait)
2026 {
2027 	struct sock *sk = sock->sk, *other;
2028 	unsigned int mask, writable;
2029 
2030 	sock_poll_wait(file, sk_sleep(sk), wait);
2031 	mask = 0;
2032 
2033 	/* exceptional events? */
2034 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2035 		mask |= POLLERR;
2036 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2037 		mask |= POLLRDHUP;
2038 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2039 		mask |= POLLHUP;
2040 
2041 	/* readable? */
2042 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2043 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2044 		mask |= POLLIN | POLLRDNORM;
2045 
2046 	/* Connection-based need to check for termination and startup */
2047 	if (sk->sk_type == SOCK_SEQPACKET) {
2048 		if (sk->sk_state == TCP_CLOSE)
2049 			mask |= POLLHUP;
2050 		/* connection hasn't started yet? */
2051 		if (sk->sk_state == TCP_SYN_SENT)
2052 			return mask;
2053 	}
2054 
2055 	/* writable? */
2056 	writable = unix_writable(sk);
2057 	if (writable) {
2058 		other = unix_peer_get(sk);
2059 		if (other) {
2060 			if (unix_peer(other) != sk) {
2061 				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2062 					  wait);
2063 				if (unix_recvq_full(other))
2064 					writable = 0;
2065 			}
2066 
2067 			sock_put(other);
2068 		}
2069 	}
2070 
2071 	if (writable)
2072 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2073 	else
2074 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2075 
2076 	return mask;
2077 }
2078 
2079 #ifdef CONFIG_PROC_FS
2080 static struct sock *first_unix_socket(int *i)
2081 {
2082 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2083 		if (!hlist_empty(&unix_socket_table[*i]))
2084 			return __sk_head(&unix_socket_table[*i]);
2085 	}
2086 	return NULL;
2087 }
2088 
2089 static struct sock *next_unix_socket(int *i, struct sock *s)
2090 {
2091 	struct sock *next = sk_next(s);
2092 	/* More in this chain? */
2093 	if (next)
2094 		return next;
2095 	/* Look for next non-empty chain. */
2096 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2097 		if (!hlist_empty(&unix_socket_table[*i]))
2098 			return __sk_head(&unix_socket_table[*i]);
2099 	}
2100 	return NULL;
2101 }
2102 
2103 struct unix_iter_state {
2104 	struct seq_net_private p;
2105 	int i;
2106 };
2107 
2108 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2109 {
2110 	struct unix_iter_state *iter = seq->private;
2111 	loff_t off = 0;
2112 	struct sock *s;
2113 
2114 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2115 		if (sock_net(s) != seq_file_net(seq))
2116 			continue;
2117 		if (off == pos)
2118 			return s;
2119 		++off;
2120 	}
2121 	return NULL;
2122 }
2123 
2124 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2125 	__acquires(unix_table_lock)
2126 {
2127 	spin_lock(&unix_table_lock);
2128 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2129 }
2130 
2131 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2132 {
2133 	struct unix_iter_state *iter = seq->private;
2134 	struct sock *sk = v;
2135 	++*pos;
2136 
2137 	if (v == SEQ_START_TOKEN)
2138 		sk = first_unix_socket(&iter->i);
2139 	else
2140 		sk = next_unix_socket(&iter->i, sk);
2141 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2142 		sk = next_unix_socket(&iter->i, sk);
2143 	return sk;
2144 }
2145 
2146 static void unix_seq_stop(struct seq_file *seq, void *v)
2147 	__releases(unix_table_lock)
2148 {
2149 	spin_unlock(&unix_table_lock);
2150 }
2151 
2152 static int unix_seq_show(struct seq_file *seq, void *v)
2153 {
2154 
2155 	if (v == SEQ_START_TOKEN)
2156 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2157 			 "Inode Path\n");
2158 	else {
2159 		struct sock *s = v;
2160 		struct unix_sock *u = unix_sk(s);
2161 		unix_state_lock(s);
2162 
2163 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2164 			s,
2165 			atomic_read(&s->sk_refcnt),
2166 			0,
2167 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2168 			s->sk_type,
2169 			s->sk_socket ?
2170 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2171 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2172 			sock_i_ino(s));
2173 
2174 		if (u->addr) {
2175 			int i, len;
2176 			seq_putc(seq, ' ');
2177 
2178 			i = 0;
2179 			len = u->addr->len - sizeof(short);
2180 			if (!UNIX_ABSTRACT(s))
2181 				len--;
2182 			else {
2183 				seq_putc(seq, '@');
2184 				i++;
2185 			}
2186 			for ( ; i < len; i++)
2187 				seq_putc(seq, u->addr->name->sun_path[i]);
2188 		}
2189 		unix_state_unlock(s);
2190 		seq_putc(seq, '\n');
2191 	}
2192 
2193 	return 0;
2194 }
2195 
2196 static const struct seq_operations unix_seq_ops = {
2197 	.start  = unix_seq_start,
2198 	.next   = unix_seq_next,
2199 	.stop   = unix_seq_stop,
2200 	.show   = unix_seq_show,
2201 };
2202 
2203 static int unix_seq_open(struct inode *inode, struct file *file)
2204 {
2205 	return seq_open_net(inode, file, &unix_seq_ops,
2206 			    sizeof(struct unix_iter_state));
2207 }
2208 
2209 static const struct file_operations unix_seq_fops = {
2210 	.owner		= THIS_MODULE,
2211 	.open		= unix_seq_open,
2212 	.read		= seq_read,
2213 	.llseek		= seq_lseek,
2214 	.release	= seq_release_net,
2215 };
2216 
2217 #endif
2218 
2219 static const struct net_proto_family unix_family_ops = {
2220 	.family = PF_UNIX,
2221 	.create = unix_create,
2222 	.owner	= THIS_MODULE,
2223 };
2224 
2225 
2226 static int __net_init unix_net_init(struct net *net)
2227 {
2228 	int error = -ENOMEM;
2229 
2230 	net->unx.sysctl_max_dgram_qlen = 10;
2231 	if (unix_sysctl_register(net))
2232 		goto out;
2233 
2234 #ifdef CONFIG_PROC_FS
2235 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2236 		unix_sysctl_unregister(net);
2237 		goto out;
2238 	}
2239 #endif
2240 	error = 0;
2241 out:
2242 	return error;
2243 }
2244 
2245 static void __net_exit unix_net_exit(struct net *net)
2246 {
2247 	unix_sysctl_unregister(net);
2248 	proc_net_remove(net, "unix");
2249 }
2250 
2251 static struct pernet_operations unix_net_ops = {
2252 	.init = unix_net_init,
2253 	.exit = unix_net_exit,
2254 };
2255 
2256 static int __init af_unix_init(void)
2257 {
2258 	int rc = -1;
2259 	struct sk_buff *dummy_skb;
2260 
2261 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2262 
2263 	rc = proto_register(&unix_proto, 1);
2264 	if (rc != 0) {
2265 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2266 		       __func__);
2267 		goto out;
2268 	}
2269 
2270 	sock_register(&unix_family_ops);
2271 	register_pernet_subsys(&unix_net_ops);
2272 out:
2273 	return rc;
2274 }
2275 
2276 static void __exit af_unix_exit(void)
2277 {
2278 	sock_unregister(PF_UNIX);
2279 	proto_unregister(&unix_proto);
2280 	unregister_pernet_subsys(&unix_net_ops);
2281 }
2282 
2283 /* Earlier than device_initcall() so that other drivers invoking
2284    request_module() don't end up in a loop when modprobe tries
2285    to use a UNIX socket. But later than subsys_initcall() because
2286    we depend on stuff initialised there */
2287 fs_initcall(af_unix_init);
2288 module_exit(af_unix_exit);
2289 
2290 MODULE_LICENSE("GPL");
2291 MODULE_ALIAS_NETPROTO(PF_UNIX);
2292