xref: /openbmc/linux/net/unix/af_unix.c (revision b6dcefde)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
314 static void unix_write_space(struct sock *sk)
315 {
316 	read_lock(&sk->sk_callback_lock);
317 	if (unix_writable(sk)) {
318 		if (sk_has_sleeper(sk))
319 			wake_up_interruptible_sync(sk->sk_sleep);
320 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 	}
322 	read_unlock(&sk->sk_callback_lock);
323 }
324 
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332 		skb_queue_purge(&sk->sk_receive_queue);
333 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334 
335 		/* If one link of bidirectional dgram pipe is disconnected,
336 		 * we signal error. Messages are lost. Do not make this,
337 		 * when peer was not connected to us.
338 		 */
339 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340 			other->sk_err = ECONNRESET;
341 			other->sk_error_report(other);
342 		}
343 	}
344 }
345 
346 static void unix_sock_destructor(struct sock *sk)
347 {
348 	struct unix_sock *u = unix_sk(sk);
349 
350 	skb_queue_purge(&sk->sk_receive_queue);
351 
352 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353 	WARN_ON(!sk_unhashed(sk));
354 	WARN_ON(sk->sk_socket);
355 	if (!sock_flag(sk, SOCK_DEAD)) {
356 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357 		return;
358 	}
359 
360 	if (u->addr)
361 		unix_release_addr(u->addr);
362 
363 	atomic_dec(&unix_nr_socks);
364 	local_bh_disable();
365 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 	local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369 		atomic_read(&unix_nr_socks));
370 #endif
371 }
372 
373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375 	struct unix_sock *u = unix_sk(sk);
376 	struct dentry *dentry;
377 	struct vfsmount *mnt;
378 	struct sock *skpair;
379 	struct sk_buff *skb;
380 	int state;
381 
382 	unix_remove_socket(sk);
383 
384 	/* Clear state */
385 	unix_state_lock(sk);
386 	sock_orphan(sk);
387 	sk->sk_shutdown = SHUTDOWN_MASK;
388 	dentry	     = u->dentry;
389 	u->dentry    = NULL;
390 	mnt	     = u->mnt;
391 	u->mnt	     = NULL;
392 	state = sk->sk_state;
393 	sk->sk_state = TCP_CLOSE;
394 	unix_state_unlock(sk);
395 
396 	wake_up_interruptible_all(&u->peer_wait);
397 
398 	skpair = unix_peer(sk);
399 
400 	if (skpair != NULL) {
401 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402 			unix_state_lock(skpair);
403 			/* No more writes */
404 			skpair->sk_shutdown = SHUTDOWN_MASK;
405 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406 				skpair->sk_err = ECONNRESET;
407 			unix_state_unlock(skpair);
408 			skpair->sk_state_change(skpair);
409 			read_lock(&skpair->sk_callback_lock);
410 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 			read_unlock(&skpair->sk_callback_lock);
412 		}
413 		sock_put(skpair); /* It may now die */
414 		unix_peer(sk) = NULL;
415 	}
416 
417 	/* Try to flush out this socket. Throw out buffers at least */
418 
419 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420 		if (state == TCP_LISTEN)
421 			unix_release_sock(skb->sk, 1);
422 		/* passed fds are erased in the kfree_skb hook	      */
423 		kfree_skb(skb);
424 	}
425 
426 	if (dentry) {
427 		dput(dentry);
428 		mntput(mnt);
429 	}
430 
431 	sock_put(sk);
432 
433 	/* ---- Socket is dead now and most probably destroyed ---- */
434 
435 	/*
436 	 * Fixme: BSD difference: In BSD all sockets connected to use get
437 	 *	  ECONNRESET and we die on the spot. In Linux we behave
438 	 *	  like files and pipes do and wait for the last
439 	 *	  dereference.
440 	 *
441 	 * Can't we simply set sock->err?
442 	 *
443 	 *	  What the above comment does talk about? --ANK(980817)
444 	 */
445 
446 	if (unix_tot_inflight)
447 		unix_gc();		/* Garbage collect fds */
448 
449 	return 0;
450 }
451 
452 static int unix_listen(struct socket *sock, int backlog)
453 {
454 	int err;
455 	struct sock *sk = sock->sk;
456 	struct unix_sock *u = unix_sk(sk);
457 
458 	err = -EOPNOTSUPP;
459 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460 		goto out;	/* Only stream/seqpacket sockets accept */
461 	err = -EINVAL;
462 	if (!u->addr)
463 		goto out;	/* No listens on an unbound socket */
464 	unix_state_lock(sk);
465 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466 		goto out_unlock;
467 	if (backlog > sk->sk_max_ack_backlog)
468 		wake_up_interruptible_all(&u->peer_wait);
469 	sk->sk_max_ack_backlog	= backlog;
470 	sk->sk_state		= TCP_LISTEN;
471 	/* set credentials so connect can copy them */
472 	sk->sk_peercred.pid	= task_tgid_vnr(current);
473 	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474 	err = 0;
475 
476 out_unlock:
477 	unix_state_unlock(sk);
478 out:
479 	return err;
480 }
481 
482 static int unix_release(struct socket *);
483 static int unix_bind(struct socket *, struct sockaddr *, int);
484 static int unix_stream_connect(struct socket *, struct sockaddr *,
485 			       int addr_len, int flags);
486 static int unix_socketpair(struct socket *, struct socket *);
487 static int unix_accept(struct socket *, struct socket *, int);
488 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490 static unsigned int unix_dgram_poll(struct file *, struct socket *,
491 				    poll_table *);
492 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493 static int unix_shutdown(struct socket *, int);
494 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t);
496 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497 			       struct msghdr *, size_t, int);
498 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t);
500 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501 			      struct msghdr *, size_t, int);
502 static int unix_dgram_connect(struct socket *, struct sockaddr *,
503 			      int, int);
504 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505 				  struct msghdr *, size_t);
506 
507 static const struct proto_ops unix_stream_ops = {
508 	.family =	PF_UNIX,
509 	.owner =	THIS_MODULE,
510 	.release =	unix_release,
511 	.bind =		unix_bind,
512 	.connect =	unix_stream_connect,
513 	.socketpair =	unix_socketpair,
514 	.accept =	unix_accept,
515 	.getname =	unix_getname,
516 	.poll =		unix_poll,
517 	.ioctl =	unix_ioctl,
518 	.listen =	unix_listen,
519 	.shutdown =	unix_shutdown,
520 	.setsockopt =	sock_no_setsockopt,
521 	.getsockopt =	sock_no_getsockopt,
522 	.sendmsg =	unix_stream_sendmsg,
523 	.recvmsg =	unix_stream_recvmsg,
524 	.mmap =		sock_no_mmap,
525 	.sendpage =	sock_no_sendpage,
526 };
527 
528 static const struct proto_ops unix_dgram_ops = {
529 	.family =	PF_UNIX,
530 	.owner =	THIS_MODULE,
531 	.release =	unix_release,
532 	.bind =		unix_bind,
533 	.connect =	unix_dgram_connect,
534 	.socketpair =	unix_socketpair,
535 	.accept =	sock_no_accept,
536 	.getname =	unix_getname,
537 	.poll =		unix_dgram_poll,
538 	.ioctl =	unix_ioctl,
539 	.listen =	sock_no_listen,
540 	.shutdown =	unix_shutdown,
541 	.setsockopt =	sock_no_setsockopt,
542 	.getsockopt =	sock_no_getsockopt,
543 	.sendmsg =	unix_dgram_sendmsg,
544 	.recvmsg =	unix_dgram_recvmsg,
545 	.mmap =		sock_no_mmap,
546 	.sendpage =	sock_no_sendpage,
547 };
548 
549 static const struct proto_ops unix_seqpacket_ops = {
550 	.family =	PF_UNIX,
551 	.owner =	THIS_MODULE,
552 	.release =	unix_release,
553 	.bind =		unix_bind,
554 	.connect =	unix_stream_connect,
555 	.socketpair =	unix_socketpair,
556 	.accept =	unix_accept,
557 	.getname =	unix_getname,
558 	.poll =		unix_dgram_poll,
559 	.ioctl =	unix_ioctl,
560 	.listen =	unix_listen,
561 	.shutdown =	unix_shutdown,
562 	.setsockopt =	sock_no_setsockopt,
563 	.getsockopt =	sock_no_getsockopt,
564 	.sendmsg =	unix_seqpacket_sendmsg,
565 	.recvmsg =	unix_dgram_recvmsg,
566 	.mmap =		sock_no_mmap,
567 	.sendpage =	sock_no_sendpage,
568 };
569 
570 static struct proto unix_proto = {
571 	.name			= "UNIX",
572 	.owner			= THIS_MODULE,
573 	.obj_size		= sizeof(struct unix_sock),
574 };
575 
576 /*
577  * AF_UNIX sockets do not interact with hardware, hence they
578  * dont trigger interrupts - so it's safe for them to have
579  * bh-unsafe locking for their sk_receive_queue.lock. Split off
580  * this special lock-class by reinitializing the spinlock key:
581  */
582 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583 
584 static struct sock *unix_create1(struct net *net, struct socket *sock)
585 {
586 	struct sock *sk = NULL;
587 	struct unix_sock *u;
588 
589 	atomic_inc(&unix_nr_socks);
590 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591 		goto out;
592 
593 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594 	if (!sk)
595 		goto out;
596 
597 	sock_init_data(sock, sk);
598 	lockdep_set_class(&sk->sk_receive_queue.lock,
599 				&af_unix_sk_receive_queue_lock_key);
600 
601 	sk->sk_write_space	= unix_write_space;
602 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603 	sk->sk_destruct		= unix_sock_destructor;
604 	u	  = unix_sk(sk);
605 	u->dentry = NULL;
606 	u->mnt	  = NULL;
607 	spin_lock_init(&u->lock);
608 	atomic_long_set(&u->inflight, 0);
609 	INIT_LIST_HEAD(&u->link);
610 	mutex_init(&u->readlock); /* single task reading lock */
611 	init_waitqueue_head(&u->peer_wait);
612 	unix_insert_socket(unix_sockets_unbound, sk);
613 out:
614 	if (sk == NULL)
615 		atomic_dec(&unix_nr_socks);
616 	else {
617 		local_bh_disable();
618 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619 		local_bh_enable();
620 	}
621 	return sk;
622 }
623 
624 static int unix_create(struct net *net, struct socket *sock, int protocol,
625 		       int kern)
626 {
627 	if (protocol && protocol != PF_UNIX)
628 		return -EPROTONOSUPPORT;
629 
630 	sock->state = SS_UNCONNECTED;
631 
632 	switch (sock->type) {
633 	case SOCK_STREAM:
634 		sock->ops = &unix_stream_ops;
635 		break;
636 		/*
637 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
638 		 *	nothing uses it.
639 		 */
640 	case SOCK_RAW:
641 		sock->type = SOCK_DGRAM;
642 	case SOCK_DGRAM:
643 		sock->ops = &unix_dgram_ops;
644 		break;
645 	case SOCK_SEQPACKET:
646 		sock->ops = &unix_seqpacket_ops;
647 		break;
648 	default:
649 		return -ESOCKTNOSUPPORT;
650 	}
651 
652 	return unix_create1(net, sock) ? 0 : -ENOMEM;
653 }
654 
655 static int unix_release(struct socket *sock)
656 {
657 	struct sock *sk = sock->sk;
658 
659 	if (!sk)
660 		return 0;
661 
662 	sock->sk = NULL;
663 
664 	return unix_release_sock(sk, 0);
665 }
666 
667 static int unix_autobind(struct socket *sock)
668 {
669 	struct sock *sk = sock->sk;
670 	struct net *net = sock_net(sk);
671 	struct unix_sock *u = unix_sk(sk);
672 	static u32 ordernum = 1;
673 	struct unix_address *addr;
674 	int err;
675 
676 	mutex_lock(&u->readlock);
677 
678 	err = 0;
679 	if (u->addr)
680 		goto out;
681 
682 	err = -ENOMEM;
683 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
684 	if (!addr)
685 		goto out;
686 
687 	addr->name->sun_family = AF_UNIX;
688 	atomic_set(&addr->refcnt, 1);
689 
690 retry:
691 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
692 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
693 
694 	spin_lock(&unix_table_lock);
695 	ordernum = (ordernum+1)&0xFFFFF;
696 
697 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
698 				      addr->hash)) {
699 		spin_unlock(&unix_table_lock);
700 		/* Sanity yield. It is unusual case, but yet... */
701 		if (!(ordernum&0xFF))
702 			yield();
703 		goto retry;
704 	}
705 	addr->hash ^= sk->sk_type;
706 
707 	__unix_remove_socket(sk);
708 	u->addr = addr;
709 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
710 	spin_unlock(&unix_table_lock);
711 	err = 0;
712 
713 out:	mutex_unlock(&u->readlock);
714 	return err;
715 }
716 
717 static struct sock *unix_find_other(struct net *net,
718 				    struct sockaddr_un *sunname, int len,
719 				    int type, unsigned hash, int *error)
720 {
721 	struct sock *u;
722 	struct path path;
723 	int err = 0;
724 
725 	if (sunname->sun_path[0]) {
726 		struct inode *inode;
727 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
728 		if (err)
729 			goto fail;
730 		inode = path.dentry->d_inode;
731 		err = inode_permission(inode, MAY_WRITE);
732 		if (err)
733 			goto put_fail;
734 
735 		err = -ECONNREFUSED;
736 		if (!S_ISSOCK(inode->i_mode))
737 			goto put_fail;
738 		u = unix_find_socket_byinode(net, inode);
739 		if (!u)
740 			goto put_fail;
741 
742 		if (u->sk_type == type)
743 			touch_atime(path.mnt, path.dentry);
744 
745 		path_put(&path);
746 
747 		err = -EPROTOTYPE;
748 		if (u->sk_type != type) {
749 			sock_put(u);
750 			goto fail;
751 		}
752 	} else {
753 		err = -ECONNREFUSED;
754 		u = unix_find_socket_byname(net, sunname, len, type, hash);
755 		if (u) {
756 			struct dentry *dentry;
757 			dentry = unix_sk(u)->dentry;
758 			if (dentry)
759 				touch_atime(unix_sk(u)->mnt, dentry);
760 		} else
761 			goto fail;
762 	}
763 	return u;
764 
765 put_fail:
766 	path_put(&path);
767 fail:
768 	*error = err;
769 	return NULL;
770 }
771 
772 
773 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
774 {
775 	struct sock *sk = sock->sk;
776 	struct net *net = sock_net(sk);
777 	struct unix_sock *u = unix_sk(sk);
778 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
779 	struct dentry *dentry = NULL;
780 	struct nameidata nd;
781 	int err;
782 	unsigned hash;
783 	struct unix_address *addr;
784 	struct hlist_head *list;
785 
786 	err = -EINVAL;
787 	if (sunaddr->sun_family != AF_UNIX)
788 		goto out;
789 
790 	if (addr_len == sizeof(short)) {
791 		err = unix_autobind(sock);
792 		goto out;
793 	}
794 
795 	err = unix_mkname(sunaddr, addr_len, &hash);
796 	if (err < 0)
797 		goto out;
798 	addr_len = err;
799 
800 	mutex_lock(&u->readlock);
801 
802 	err = -EINVAL;
803 	if (u->addr)
804 		goto out_up;
805 
806 	err = -ENOMEM;
807 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
808 	if (!addr)
809 		goto out_up;
810 
811 	memcpy(addr->name, sunaddr, addr_len);
812 	addr->len = addr_len;
813 	addr->hash = hash ^ sk->sk_type;
814 	atomic_set(&addr->refcnt, 1);
815 
816 	if (sunaddr->sun_path[0]) {
817 		unsigned int mode;
818 		err = 0;
819 		/*
820 		 * Get the parent directory, calculate the hash for last
821 		 * component.
822 		 */
823 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
824 		if (err)
825 			goto out_mknod_parent;
826 
827 		dentry = lookup_create(&nd, 0);
828 		err = PTR_ERR(dentry);
829 		if (IS_ERR(dentry))
830 			goto out_mknod_unlock;
831 
832 		/*
833 		 * All right, let's create it.
834 		 */
835 		mode = S_IFSOCK |
836 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
837 		err = mnt_want_write(nd.path.mnt);
838 		if (err)
839 			goto out_mknod_dput;
840 		err = security_path_mknod(&nd.path, dentry, mode, 0);
841 		if (err)
842 			goto out_mknod_drop_write;
843 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
844 out_mknod_drop_write:
845 		mnt_drop_write(nd.path.mnt);
846 		if (err)
847 			goto out_mknod_dput;
848 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
849 		dput(nd.path.dentry);
850 		nd.path.dentry = dentry;
851 
852 		addr->hash = UNIX_HASH_SIZE;
853 	}
854 
855 	spin_lock(&unix_table_lock);
856 
857 	if (!sunaddr->sun_path[0]) {
858 		err = -EADDRINUSE;
859 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
860 					      sk->sk_type, hash)) {
861 			unix_release_addr(addr);
862 			goto out_unlock;
863 		}
864 
865 		list = &unix_socket_table[addr->hash];
866 	} else {
867 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
868 		u->dentry = nd.path.dentry;
869 		u->mnt    = nd.path.mnt;
870 	}
871 
872 	err = 0;
873 	__unix_remove_socket(sk);
874 	u->addr = addr;
875 	__unix_insert_socket(list, sk);
876 
877 out_unlock:
878 	spin_unlock(&unix_table_lock);
879 out_up:
880 	mutex_unlock(&u->readlock);
881 out:
882 	return err;
883 
884 out_mknod_dput:
885 	dput(dentry);
886 out_mknod_unlock:
887 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
888 	path_put(&nd.path);
889 out_mknod_parent:
890 	if (err == -EEXIST)
891 		err = -EADDRINUSE;
892 	unix_release_addr(addr);
893 	goto out_up;
894 }
895 
896 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
897 {
898 	if (unlikely(sk1 == sk2) || !sk2) {
899 		unix_state_lock(sk1);
900 		return;
901 	}
902 	if (sk1 < sk2) {
903 		unix_state_lock(sk1);
904 		unix_state_lock_nested(sk2);
905 	} else {
906 		unix_state_lock(sk2);
907 		unix_state_lock_nested(sk1);
908 	}
909 }
910 
911 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
912 {
913 	if (unlikely(sk1 == sk2) || !sk2) {
914 		unix_state_unlock(sk1);
915 		return;
916 	}
917 	unix_state_unlock(sk1);
918 	unix_state_unlock(sk2);
919 }
920 
921 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
922 			      int alen, int flags)
923 {
924 	struct sock *sk = sock->sk;
925 	struct net *net = sock_net(sk);
926 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
927 	struct sock *other;
928 	unsigned hash;
929 	int err;
930 
931 	if (addr->sa_family != AF_UNSPEC) {
932 		err = unix_mkname(sunaddr, alen, &hash);
933 		if (err < 0)
934 			goto out;
935 		alen = err;
936 
937 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
938 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
939 			goto out;
940 
941 restart:
942 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
943 		if (!other)
944 			goto out;
945 
946 		unix_state_double_lock(sk, other);
947 
948 		/* Apparently VFS overslept socket death. Retry. */
949 		if (sock_flag(other, SOCK_DEAD)) {
950 			unix_state_double_unlock(sk, other);
951 			sock_put(other);
952 			goto restart;
953 		}
954 
955 		err = -EPERM;
956 		if (!unix_may_send(sk, other))
957 			goto out_unlock;
958 
959 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
960 		if (err)
961 			goto out_unlock;
962 
963 	} else {
964 		/*
965 		 *	1003.1g breaking connected state with AF_UNSPEC
966 		 */
967 		other = NULL;
968 		unix_state_double_lock(sk, other);
969 	}
970 
971 	/*
972 	 * If it was connected, reconnect.
973 	 */
974 	if (unix_peer(sk)) {
975 		struct sock *old_peer = unix_peer(sk);
976 		unix_peer(sk) = other;
977 		unix_state_double_unlock(sk, other);
978 
979 		if (other != old_peer)
980 			unix_dgram_disconnected(sk, old_peer);
981 		sock_put(old_peer);
982 	} else {
983 		unix_peer(sk) = other;
984 		unix_state_double_unlock(sk, other);
985 	}
986 	return 0;
987 
988 out_unlock:
989 	unix_state_double_unlock(sk, other);
990 	sock_put(other);
991 out:
992 	return err;
993 }
994 
995 static long unix_wait_for_peer(struct sock *other, long timeo)
996 {
997 	struct unix_sock *u = unix_sk(other);
998 	int sched;
999 	DEFINE_WAIT(wait);
1000 
1001 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1002 
1003 	sched = !sock_flag(other, SOCK_DEAD) &&
1004 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1005 		unix_recvq_full(other);
1006 
1007 	unix_state_unlock(other);
1008 
1009 	if (sched)
1010 		timeo = schedule_timeout(timeo);
1011 
1012 	finish_wait(&u->peer_wait, &wait);
1013 	return timeo;
1014 }
1015 
1016 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1017 			       int addr_len, int flags)
1018 {
1019 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1020 	struct sock *sk = sock->sk;
1021 	struct net *net = sock_net(sk);
1022 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1023 	struct sock *newsk = NULL;
1024 	struct sock *other = NULL;
1025 	struct sk_buff *skb = NULL;
1026 	unsigned hash;
1027 	int st;
1028 	int err;
1029 	long timeo;
1030 
1031 	err = unix_mkname(sunaddr, addr_len, &hash);
1032 	if (err < 0)
1033 		goto out;
1034 	addr_len = err;
1035 
1036 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1037 	    (err = unix_autobind(sock)) != 0)
1038 		goto out;
1039 
1040 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1041 
1042 	/* First of all allocate resources.
1043 	   If we will make it after state is locked,
1044 	   we will have to recheck all again in any case.
1045 	 */
1046 
1047 	err = -ENOMEM;
1048 
1049 	/* create new sock for complete connection */
1050 	newsk = unix_create1(sock_net(sk), NULL);
1051 	if (newsk == NULL)
1052 		goto out;
1053 
1054 	/* Allocate skb for sending to listening sock */
1055 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1056 	if (skb == NULL)
1057 		goto out;
1058 
1059 restart:
1060 	/*  Find listening sock. */
1061 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1062 	if (!other)
1063 		goto out;
1064 
1065 	/* Latch state of peer */
1066 	unix_state_lock(other);
1067 
1068 	/* Apparently VFS overslept socket death. Retry. */
1069 	if (sock_flag(other, SOCK_DEAD)) {
1070 		unix_state_unlock(other);
1071 		sock_put(other);
1072 		goto restart;
1073 	}
1074 
1075 	err = -ECONNREFUSED;
1076 	if (other->sk_state != TCP_LISTEN)
1077 		goto out_unlock;
1078 	if (other->sk_shutdown & RCV_SHUTDOWN)
1079 		goto out_unlock;
1080 
1081 	if (unix_recvq_full(other)) {
1082 		err = -EAGAIN;
1083 		if (!timeo)
1084 			goto out_unlock;
1085 
1086 		timeo = unix_wait_for_peer(other, timeo);
1087 
1088 		err = sock_intr_errno(timeo);
1089 		if (signal_pending(current))
1090 			goto out;
1091 		sock_put(other);
1092 		goto restart;
1093 	}
1094 
1095 	/* Latch our state.
1096 
1097 	   It is tricky place. We need to grab write lock and cannot
1098 	   drop lock on peer. It is dangerous because deadlock is
1099 	   possible. Connect to self case and simultaneous
1100 	   attempt to connect are eliminated by checking socket
1101 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1102 	   check this before attempt to grab lock.
1103 
1104 	   Well, and we have to recheck the state after socket locked.
1105 	 */
1106 	st = sk->sk_state;
1107 
1108 	switch (st) {
1109 	case TCP_CLOSE:
1110 		/* This is ok... continue with connect */
1111 		break;
1112 	case TCP_ESTABLISHED:
1113 		/* Socket is already connected */
1114 		err = -EISCONN;
1115 		goto out_unlock;
1116 	default:
1117 		err = -EINVAL;
1118 		goto out_unlock;
1119 	}
1120 
1121 	unix_state_lock_nested(sk);
1122 
1123 	if (sk->sk_state != st) {
1124 		unix_state_unlock(sk);
1125 		unix_state_unlock(other);
1126 		sock_put(other);
1127 		goto restart;
1128 	}
1129 
1130 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1131 	if (err) {
1132 		unix_state_unlock(sk);
1133 		goto out_unlock;
1134 	}
1135 
1136 	/* The way is open! Fastly set all the necessary fields... */
1137 
1138 	sock_hold(sk);
1139 	unix_peer(newsk)	= sk;
1140 	newsk->sk_state		= TCP_ESTABLISHED;
1141 	newsk->sk_type		= sk->sk_type;
1142 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1143 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1144 	newu = unix_sk(newsk);
1145 	newsk->sk_sleep		= &newu->peer_wait;
1146 	otheru = unix_sk(other);
1147 
1148 	/* copy address information from listening to new sock*/
1149 	if (otheru->addr) {
1150 		atomic_inc(&otheru->addr->refcnt);
1151 		newu->addr = otheru->addr;
1152 	}
1153 	if (otheru->dentry) {
1154 		newu->dentry	= dget(otheru->dentry);
1155 		newu->mnt	= mntget(otheru->mnt);
1156 	}
1157 
1158 	/* Set credentials */
1159 	sk->sk_peercred = other->sk_peercred;
1160 
1161 	sock->state	= SS_CONNECTED;
1162 	sk->sk_state	= TCP_ESTABLISHED;
1163 	sock_hold(newsk);
1164 
1165 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1166 	unix_peer(sk)	= newsk;
1167 
1168 	unix_state_unlock(sk);
1169 
1170 	/* take ten and and send info to listening sock */
1171 	spin_lock(&other->sk_receive_queue.lock);
1172 	__skb_queue_tail(&other->sk_receive_queue, skb);
1173 	spin_unlock(&other->sk_receive_queue.lock);
1174 	unix_state_unlock(other);
1175 	other->sk_data_ready(other, 0);
1176 	sock_put(other);
1177 	return 0;
1178 
1179 out_unlock:
1180 	if (other)
1181 		unix_state_unlock(other);
1182 
1183 out:
1184 	kfree_skb(skb);
1185 	if (newsk)
1186 		unix_release_sock(newsk, 0);
1187 	if (other)
1188 		sock_put(other);
1189 	return err;
1190 }
1191 
1192 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1193 {
1194 	struct sock *ska = socka->sk, *skb = sockb->sk;
1195 
1196 	/* Join our sockets back to back */
1197 	sock_hold(ska);
1198 	sock_hold(skb);
1199 	unix_peer(ska) = skb;
1200 	unix_peer(skb) = ska;
1201 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1202 	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1203 	ska->sk_peercred.uid = skb->sk_peercred.uid;
1204 	ska->sk_peercred.gid = skb->sk_peercred.gid;
1205 
1206 	if (ska->sk_type != SOCK_DGRAM) {
1207 		ska->sk_state = TCP_ESTABLISHED;
1208 		skb->sk_state = TCP_ESTABLISHED;
1209 		socka->state  = SS_CONNECTED;
1210 		sockb->state  = SS_CONNECTED;
1211 	}
1212 	return 0;
1213 }
1214 
1215 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1216 {
1217 	struct sock *sk = sock->sk;
1218 	struct sock *tsk;
1219 	struct sk_buff *skb;
1220 	int err;
1221 
1222 	err = -EOPNOTSUPP;
1223 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1224 		goto out;
1225 
1226 	err = -EINVAL;
1227 	if (sk->sk_state != TCP_LISTEN)
1228 		goto out;
1229 
1230 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1231 	 * so that no locks are necessary.
1232 	 */
1233 
1234 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1235 	if (!skb) {
1236 		/* This means receive shutdown. */
1237 		if (err == 0)
1238 			err = -EINVAL;
1239 		goto out;
1240 	}
1241 
1242 	tsk = skb->sk;
1243 	skb_free_datagram(sk, skb);
1244 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1245 
1246 	/* attach accepted sock to socket */
1247 	unix_state_lock(tsk);
1248 	newsock->state = SS_CONNECTED;
1249 	sock_graft(tsk, newsock);
1250 	unix_state_unlock(tsk);
1251 	return 0;
1252 
1253 out:
1254 	return err;
1255 }
1256 
1257 
1258 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1259 {
1260 	struct sock *sk = sock->sk;
1261 	struct unix_sock *u;
1262 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1263 	int err = 0;
1264 
1265 	if (peer) {
1266 		sk = unix_peer_get(sk);
1267 
1268 		err = -ENOTCONN;
1269 		if (!sk)
1270 			goto out;
1271 		err = 0;
1272 	} else {
1273 		sock_hold(sk);
1274 	}
1275 
1276 	u = unix_sk(sk);
1277 	unix_state_lock(sk);
1278 	if (!u->addr) {
1279 		sunaddr->sun_family = AF_UNIX;
1280 		sunaddr->sun_path[0] = 0;
1281 		*uaddr_len = sizeof(short);
1282 	} else {
1283 		struct unix_address *addr = u->addr;
1284 
1285 		*uaddr_len = addr->len;
1286 		memcpy(sunaddr, addr->name, *uaddr_len);
1287 	}
1288 	unix_state_unlock(sk);
1289 	sock_put(sk);
1290 out:
1291 	return err;
1292 }
1293 
1294 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1295 {
1296 	int i;
1297 
1298 	scm->fp = UNIXCB(skb).fp;
1299 	skb->destructor = sock_wfree;
1300 	UNIXCB(skb).fp = NULL;
1301 
1302 	for (i = scm->fp->count-1; i >= 0; i--)
1303 		unix_notinflight(scm->fp->fp[i]);
1304 }
1305 
1306 static void unix_destruct_fds(struct sk_buff *skb)
1307 {
1308 	struct scm_cookie scm;
1309 	memset(&scm, 0, sizeof(scm));
1310 	unix_detach_fds(&scm, skb);
1311 
1312 	/* Alas, it calls VFS */
1313 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1314 	scm_destroy(&scm);
1315 	sock_wfree(skb);
1316 }
1317 
1318 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1319 {
1320 	int i;
1321 
1322 	/*
1323 	 * Need to duplicate file references for the sake of garbage
1324 	 * collection.  Otherwise a socket in the fps might become a
1325 	 * candidate for GC while the skb is not yet queued.
1326 	 */
1327 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1328 	if (!UNIXCB(skb).fp)
1329 		return -ENOMEM;
1330 
1331 	for (i = scm->fp->count-1; i >= 0; i--)
1332 		unix_inflight(scm->fp->fp[i]);
1333 	skb->destructor = unix_destruct_fds;
1334 	return 0;
1335 }
1336 
1337 /*
1338  *	Send AF_UNIX data.
1339  */
1340 
1341 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1342 			      struct msghdr *msg, size_t len)
1343 {
1344 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1345 	struct sock *sk = sock->sk;
1346 	struct net *net = sock_net(sk);
1347 	struct unix_sock *u = unix_sk(sk);
1348 	struct sockaddr_un *sunaddr = msg->msg_name;
1349 	struct sock *other = NULL;
1350 	int namelen = 0; /* fake GCC */
1351 	int err;
1352 	unsigned hash;
1353 	struct sk_buff *skb;
1354 	long timeo;
1355 	struct scm_cookie tmp_scm;
1356 
1357 	if (NULL == siocb->scm)
1358 		siocb->scm = &tmp_scm;
1359 	wait_for_unix_gc();
1360 	err = scm_send(sock, msg, siocb->scm);
1361 	if (err < 0)
1362 		return err;
1363 
1364 	err = -EOPNOTSUPP;
1365 	if (msg->msg_flags&MSG_OOB)
1366 		goto out;
1367 
1368 	if (msg->msg_namelen) {
1369 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1370 		if (err < 0)
1371 			goto out;
1372 		namelen = err;
1373 	} else {
1374 		sunaddr = NULL;
1375 		err = -ENOTCONN;
1376 		other = unix_peer_get(sk);
1377 		if (!other)
1378 			goto out;
1379 	}
1380 
1381 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1382 	    && (err = unix_autobind(sock)) != 0)
1383 		goto out;
1384 
1385 	err = -EMSGSIZE;
1386 	if (len > sk->sk_sndbuf - 32)
1387 		goto out;
1388 
1389 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1390 	if (skb == NULL)
1391 		goto out;
1392 
1393 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1394 	if (siocb->scm->fp) {
1395 		err = unix_attach_fds(siocb->scm, skb);
1396 		if (err)
1397 			goto out_free;
1398 	}
1399 	unix_get_secdata(siocb->scm, skb);
1400 
1401 	skb_reset_transport_header(skb);
1402 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1403 	if (err)
1404 		goto out_free;
1405 
1406 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1407 
1408 restart:
1409 	if (!other) {
1410 		err = -ECONNRESET;
1411 		if (sunaddr == NULL)
1412 			goto out_free;
1413 
1414 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1415 					hash, &err);
1416 		if (other == NULL)
1417 			goto out_free;
1418 	}
1419 
1420 	unix_state_lock(other);
1421 	err = -EPERM;
1422 	if (!unix_may_send(sk, other))
1423 		goto out_unlock;
1424 
1425 	if (sock_flag(other, SOCK_DEAD)) {
1426 		/*
1427 		 *	Check with 1003.1g - what should
1428 		 *	datagram error
1429 		 */
1430 		unix_state_unlock(other);
1431 		sock_put(other);
1432 
1433 		err = 0;
1434 		unix_state_lock(sk);
1435 		if (unix_peer(sk) == other) {
1436 			unix_peer(sk) = NULL;
1437 			unix_state_unlock(sk);
1438 
1439 			unix_dgram_disconnected(sk, other);
1440 			sock_put(other);
1441 			err = -ECONNREFUSED;
1442 		} else {
1443 			unix_state_unlock(sk);
1444 		}
1445 
1446 		other = NULL;
1447 		if (err)
1448 			goto out_free;
1449 		goto restart;
1450 	}
1451 
1452 	err = -EPIPE;
1453 	if (other->sk_shutdown & RCV_SHUTDOWN)
1454 		goto out_unlock;
1455 
1456 	if (sk->sk_type != SOCK_SEQPACKET) {
1457 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1458 		if (err)
1459 			goto out_unlock;
1460 	}
1461 
1462 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1463 		if (!timeo) {
1464 			err = -EAGAIN;
1465 			goto out_unlock;
1466 		}
1467 
1468 		timeo = unix_wait_for_peer(other, timeo);
1469 
1470 		err = sock_intr_errno(timeo);
1471 		if (signal_pending(current))
1472 			goto out_free;
1473 
1474 		goto restart;
1475 	}
1476 
1477 	skb_queue_tail(&other->sk_receive_queue, skb);
1478 	unix_state_unlock(other);
1479 	other->sk_data_ready(other, len);
1480 	sock_put(other);
1481 	scm_destroy(siocb->scm);
1482 	return len;
1483 
1484 out_unlock:
1485 	unix_state_unlock(other);
1486 out_free:
1487 	kfree_skb(skb);
1488 out:
1489 	if (other)
1490 		sock_put(other);
1491 	scm_destroy(siocb->scm);
1492 	return err;
1493 }
1494 
1495 
1496 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1497 			       struct msghdr *msg, size_t len)
1498 {
1499 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1500 	struct sock *sk = sock->sk;
1501 	struct sock *other = NULL;
1502 	struct sockaddr_un *sunaddr = msg->msg_name;
1503 	int err, size;
1504 	struct sk_buff *skb;
1505 	int sent = 0;
1506 	struct scm_cookie tmp_scm;
1507 	bool fds_sent = false;
1508 
1509 	if (NULL == siocb->scm)
1510 		siocb->scm = &tmp_scm;
1511 	wait_for_unix_gc();
1512 	err = scm_send(sock, msg, siocb->scm);
1513 	if (err < 0)
1514 		return err;
1515 
1516 	err = -EOPNOTSUPP;
1517 	if (msg->msg_flags&MSG_OOB)
1518 		goto out_err;
1519 
1520 	if (msg->msg_namelen) {
1521 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1522 		goto out_err;
1523 	} else {
1524 		sunaddr = NULL;
1525 		err = -ENOTCONN;
1526 		other = unix_peer(sk);
1527 		if (!other)
1528 			goto out_err;
1529 	}
1530 
1531 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1532 		goto pipe_err;
1533 
1534 	while (sent < len) {
1535 		/*
1536 		 *	Optimisation for the fact that under 0.01% of X
1537 		 *	messages typically need breaking up.
1538 		 */
1539 
1540 		size = len-sent;
1541 
1542 		/* Keep two messages in the pipe so it schedules better */
1543 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1544 			size = (sk->sk_sndbuf >> 1) - 64;
1545 
1546 		if (size > SKB_MAX_ALLOC)
1547 			size = SKB_MAX_ALLOC;
1548 
1549 		/*
1550 		 *	Grab a buffer
1551 		 */
1552 
1553 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1554 					  &err);
1555 
1556 		if (skb == NULL)
1557 			goto out_err;
1558 
1559 		/*
1560 		 *	If you pass two values to the sock_alloc_send_skb
1561 		 *	it tries to grab the large buffer with GFP_NOFS
1562 		 *	(which can fail easily), and if it fails grab the
1563 		 *	fallback size buffer which is under a page and will
1564 		 *	succeed. [Alan]
1565 		 */
1566 		size = min_t(int, size, skb_tailroom(skb));
1567 
1568 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1569 		/* Only send the fds in the first buffer */
1570 		if (siocb->scm->fp && !fds_sent) {
1571 			err = unix_attach_fds(siocb->scm, skb);
1572 			if (err) {
1573 				kfree_skb(skb);
1574 				goto out_err;
1575 			}
1576 			fds_sent = true;
1577 		}
1578 
1579 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1580 		if (err) {
1581 			kfree_skb(skb);
1582 			goto out_err;
1583 		}
1584 
1585 		unix_state_lock(other);
1586 
1587 		if (sock_flag(other, SOCK_DEAD) ||
1588 		    (other->sk_shutdown & RCV_SHUTDOWN))
1589 			goto pipe_err_free;
1590 
1591 		skb_queue_tail(&other->sk_receive_queue, skb);
1592 		unix_state_unlock(other);
1593 		other->sk_data_ready(other, size);
1594 		sent += size;
1595 	}
1596 
1597 	scm_destroy(siocb->scm);
1598 	siocb->scm = NULL;
1599 
1600 	return sent;
1601 
1602 pipe_err_free:
1603 	unix_state_unlock(other);
1604 	kfree_skb(skb);
1605 pipe_err:
1606 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1607 		send_sig(SIGPIPE, current, 0);
1608 	err = -EPIPE;
1609 out_err:
1610 	scm_destroy(siocb->scm);
1611 	siocb->scm = NULL;
1612 	return sent ? : err;
1613 }
1614 
1615 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1616 				  struct msghdr *msg, size_t len)
1617 {
1618 	int err;
1619 	struct sock *sk = sock->sk;
1620 
1621 	err = sock_error(sk);
1622 	if (err)
1623 		return err;
1624 
1625 	if (sk->sk_state != TCP_ESTABLISHED)
1626 		return -ENOTCONN;
1627 
1628 	if (msg->msg_namelen)
1629 		msg->msg_namelen = 0;
1630 
1631 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1632 }
1633 
1634 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1635 {
1636 	struct unix_sock *u = unix_sk(sk);
1637 
1638 	msg->msg_namelen = 0;
1639 	if (u->addr) {
1640 		msg->msg_namelen = u->addr->len;
1641 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1642 	}
1643 }
1644 
1645 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1646 			      struct msghdr *msg, size_t size,
1647 			      int flags)
1648 {
1649 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1650 	struct scm_cookie tmp_scm;
1651 	struct sock *sk = sock->sk;
1652 	struct unix_sock *u = unix_sk(sk);
1653 	int noblock = flags & MSG_DONTWAIT;
1654 	struct sk_buff *skb;
1655 	int err;
1656 
1657 	err = -EOPNOTSUPP;
1658 	if (flags&MSG_OOB)
1659 		goto out;
1660 
1661 	msg->msg_namelen = 0;
1662 
1663 	mutex_lock(&u->readlock);
1664 
1665 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1666 	if (!skb) {
1667 		unix_state_lock(sk);
1668 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1669 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1670 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1671 			err = 0;
1672 		unix_state_unlock(sk);
1673 		goto out_unlock;
1674 	}
1675 
1676 	wake_up_interruptible_sync(&u->peer_wait);
1677 
1678 	if (msg->msg_name)
1679 		unix_copy_addr(msg, skb->sk);
1680 
1681 	if (size > skb->len)
1682 		size = skb->len;
1683 	else if (size < skb->len)
1684 		msg->msg_flags |= MSG_TRUNC;
1685 
1686 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1687 	if (err)
1688 		goto out_free;
1689 
1690 	if (!siocb->scm) {
1691 		siocb->scm = &tmp_scm;
1692 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1693 	}
1694 	siocb->scm->creds = *UNIXCREDS(skb);
1695 	unix_set_secdata(siocb->scm, skb);
1696 
1697 	if (!(flags & MSG_PEEK)) {
1698 		if (UNIXCB(skb).fp)
1699 			unix_detach_fds(siocb->scm, skb);
1700 	} else {
1701 		/* It is questionable: on PEEK we could:
1702 		   - do not return fds - good, but too simple 8)
1703 		   - return fds, and do not return them on read (old strategy,
1704 		     apparently wrong)
1705 		   - clone fds (I chose it for now, it is the most universal
1706 		     solution)
1707 
1708 		   POSIX 1003.1g does not actually define this clearly
1709 		   at all. POSIX 1003.1g doesn't define a lot of things
1710 		   clearly however!
1711 
1712 		*/
1713 		if (UNIXCB(skb).fp)
1714 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1715 	}
1716 	err = size;
1717 
1718 	scm_recv(sock, msg, siocb->scm, flags);
1719 
1720 out_free:
1721 	skb_free_datagram(sk, skb);
1722 out_unlock:
1723 	mutex_unlock(&u->readlock);
1724 out:
1725 	return err;
1726 }
1727 
1728 /*
1729  *	Sleep until data has arrive. But check for races..
1730  */
1731 
1732 static long unix_stream_data_wait(struct sock *sk, long timeo)
1733 {
1734 	DEFINE_WAIT(wait);
1735 
1736 	unix_state_lock(sk);
1737 
1738 	for (;;) {
1739 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1740 
1741 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1742 		    sk->sk_err ||
1743 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1744 		    signal_pending(current) ||
1745 		    !timeo)
1746 			break;
1747 
1748 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1749 		unix_state_unlock(sk);
1750 		timeo = schedule_timeout(timeo);
1751 		unix_state_lock(sk);
1752 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1753 	}
1754 
1755 	finish_wait(sk->sk_sleep, &wait);
1756 	unix_state_unlock(sk);
1757 	return timeo;
1758 }
1759 
1760 
1761 
1762 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1763 			       struct msghdr *msg, size_t size,
1764 			       int flags)
1765 {
1766 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1767 	struct scm_cookie tmp_scm;
1768 	struct sock *sk = sock->sk;
1769 	struct unix_sock *u = unix_sk(sk);
1770 	struct sockaddr_un *sunaddr = msg->msg_name;
1771 	int copied = 0;
1772 	int check_creds = 0;
1773 	int target;
1774 	int err = 0;
1775 	long timeo;
1776 
1777 	err = -EINVAL;
1778 	if (sk->sk_state != TCP_ESTABLISHED)
1779 		goto out;
1780 
1781 	err = -EOPNOTSUPP;
1782 	if (flags&MSG_OOB)
1783 		goto out;
1784 
1785 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1786 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1787 
1788 	msg->msg_namelen = 0;
1789 
1790 	/* Lock the socket to prevent queue disordering
1791 	 * while sleeps in memcpy_tomsg
1792 	 */
1793 
1794 	if (!siocb->scm) {
1795 		siocb->scm = &tmp_scm;
1796 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1797 	}
1798 
1799 	mutex_lock(&u->readlock);
1800 
1801 	do {
1802 		int chunk;
1803 		struct sk_buff *skb;
1804 
1805 		unix_state_lock(sk);
1806 		skb = skb_dequeue(&sk->sk_receive_queue);
1807 		if (skb == NULL) {
1808 			if (copied >= target)
1809 				goto unlock;
1810 
1811 			/*
1812 			 *	POSIX 1003.1g mandates this order.
1813 			 */
1814 
1815 			err = sock_error(sk);
1816 			if (err)
1817 				goto unlock;
1818 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1819 				goto unlock;
1820 
1821 			unix_state_unlock(sk);
1822 			err = -EAGAIN;
1823 			if (!timeo)
1824 				break;
1825 			mutex_unlock(&u->readlock);
1826 
1827 			timeo = unix_stream_data_wait(sk, timeo);
1828 
1829 			if (signal_pending(current)) {
1830 				err = sock_intr_errno(timeo);
1831 				goto out;
1832 			}
1833 			mutex_lock(&u->readlock);
1834 			continue;
1835  unlock:
1836 			unix_state_unlock(sk);
1837 			break;
1838 		}
1839 		unix_state_unlock(sk);
1840 
1841 		if (check_creds) {
1842 			/* Never glue messages from different writers */
1843 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1844 				   sizeof(siocb->scm->creds)) != 0) {
1845 				skb_queue_head(&sk->sk_receive_queue, skb);
1846 				break;
1847 			}
1848 		} else {
1849 			/* Copy credentials */
1850 			siocb->scm->creds = *UNIXCREDS(skb);
1851 			check_creds = 1;
1852 		}
1853 
1854 		/* Copy address just once */
1855 		if (sunaddr) {
1856 			unix_copy_addr(msg, skb->sk);
1857 			sunaddr = NULL;
1858 		}
1859 
1860 		chunk = min_t(unsigned int, skb->len, size);
1861 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1862 			skb_queue_head(&sk->sk_receive_queue, skb);
1863 			if (copied == 0)
1864 				copied = -EFAULT;
1865 			break;
1866 		}
1867 		copied += chunk;
1868 		size -= chunk;
1869 
1870 		/* Mark read part of skb as used */
1871 		if (!(flags & MSG_PEEK)) {
1872 			skb_pull(skb, chunk);
1873 
1874 			if (UNIXCB(skb).fp)
1875 				unix_detach_fds(siocb->scm, skb);
1876 
1877 			/* put the skb back if we didn't use it up.. */
1878 			if (skb->len) {
1879 				skb_queue_head(&sk->sk_receive_queue, skb);
1880 				break;
1881 			}
1882 
1883 			kfree_skb(skb);
1884 
1885 			if (siocb->scm->fp)
1886 				break;
1887 		} else {
1888 			/* It is questionable, see note in unix_dgram_recvmsg.
1889 			 */
1890 			if (UNIXCB(skb).fp)
1891 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1892 
1893 			/* put message back and return */
1894 			skb_queue_head(&sk->sk_receive_queue, skb);
1895 			break;
1896 		}
1897 	} while (size);
1898 
1899 	mutex_unlock(&u->readlock);
1900 	scm_recv(sock, msg, siocb->scm, flags);
1901 out:
1902 	return copied ? : err;
1903 }
1904 
1905 static int unix_shutdown(struct socket *sock, int mode)
1906 {
1907 	struct sock *sk = sock->sk;
1908 	struct sock *other;
1909 
1910 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1911 
1912 	if (mode) {
1913 		unix_state_lock(sk);
1914 		sk->sk_shutdown |= mode;
1915 		other = unix_peer(sk);
1916 		if (other)
1917 			sock_hold(other);
1918 		unix_state_unlock(sk);
1919 		sk->sk_state_change(sk);
1920 
1921 		if (other &&
1922 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1923 
1924 			int peer_mode = 0;
1925 
1926 			if (mode&RCV_SHUTDOWN)
1927 				peer_mode |= SEND_SHUTDOWN;
1928 			if (mode&SEND_SHUTDOWN)
1929 				peer_mode |= RCV_SHUTDOWN;
1930 			unix_state_lock(other);
1931 			other->sk_shutdown |= peer_mode;
1932 			unix_state_unlock(other);
1933 			other->sk_state_change(other);
1934 			read_lock(&other->sk_callback_lock);
1935 			if (peer_mode == SHUTDOWN_MASK)
1936 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1937 			else if (peer_mode & RCV_SHUTDOWN)
1938 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1939 			read_unlock(&other->sk_callback_lock);
1940 		}
1941 		if (other)
1942 			sock_put(other);
1943 	}
1944 	return 0;
1945 }
1946 
1947 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1948 {
1949 	struct sock *sk = sock->sk;
1950 	long amount = 0;
1951 	int err;
1952 
1953 	switch (cmd) {
1954 	case SIOCOUTQ:
1955 		amount = sk_wmem_alloc_get(sk);
1956 		err = put_user(amount, (int __user *)arg);
1957 		break;
1958 	case SIOCINQ:
1959 		{
1960 			struct sk_buff *skb;
1961 
1962 			if (sk->sk_state == TCP_LISTEN) {
1963 				err = -EINVAL;
1964 				break;
1965 			}
1966 
1967 			spin_lock(&sk->sk_receive_queue.lock);
1968 			if (sk->sk_type == SOCK_STREAM ||
1969 			    sk->sk_type == SOCK_SEQPACKET) {
1970 				skb_queue_walk(&sk->sk_receive_queue, skb)
1971 					amount += skb->len;
1972 			} else {
1973 				skb = skb_peek(&sk->sk_receive_queue);
1974 				if (skb)
1975 					amount = skb->len;
1976 			}
1977 			spin_unlock(&sk->sk_receive_queue.lock);
1978 			err = put_user(amount, (int __user *)arg);
1979 			break;
1980 		}
1981 
1982 	default:
1983 		err = -ENOIOCTLCMD;
1984 		break;
1985 	}
1986 	return err;
1987 }
1988 
1989 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1990 {
1991 	struct sock *sk = sock->sk;
1992 	unsigned int mask;
1993 
1994 	sock_poll_wait(file, sk->sk_sleep, wait);
1995 	mask = 0;
1996 
1997 	/* exceptional events? */
1998 	if (sk->sk_err)
1999 		mask |= POLLERR;
2000 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2001 		mask |= POLLHUP;
2002 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2003 		mask |= POLLRDHUP;
2004 
2005 	/* readable? */
2006 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2007 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2008 		mask |= POLLIN | POLLRDNORM;
2009 
2010 	/* Connection-based need to check for termination and startup */
2011 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2012 	    sk->sk_state == TCP_CLOSE)
2013 		mask |= POLLHUP;
2014 
2015 	/*
2016 	 * we set writable also when the other side has shut down the
2017 	 * connection. This prevents stuck sockets.
2018 	 */
2019 	if (unix_writable(sk))
2020 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2021 
2022 	return mask;
2023 }
2024 
2025 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2026 				    poll_table *wait)
2027 {
2028 	struct sock *sk = sock->sk, *other;
2029 	unsigned int mask, writable;
2030 
2031 	sock_poll_wait(file, sk->sk_sleep, wait);
2032 	mask = 0;
2033 
2034 	/* exceptional events? */
2035 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2036 		mask |= POLLERR;
2037 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2038 		mask |= POLLRDHUP;
2039 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2040 		mask |= POLLHUP;
2041 
2042 	/* readable? */
2043 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2044 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2045 		mask |= POLLIN | POLLRDNORM;
2046 
2047 	/* Connection-based need to check for termination and startup */
2048 	if (sk->sk_type == SOCK_SEQPACKET) {
2049 		if (sk->sk_state == TCP_CLOSE)
2050 			mask |= POLLHUP;
2051 		/* connection hasn't started yet? */
2052 		if (sk->sk_state == TCP_SYN_SENT)
2053 			return mask;
2054 	}
2055 
2056 	/* writable? */
2057 	writable = unix_writable(sk);
2058 	if (writable) {
2059 		other = unix_peer_get(sk);
2060 		if (other) {
2061 			if (unix_peer(other) != sk) {
2062 				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2063 					  wait);
2064 				if (unix_recvq_full(other))
2065 					writable = 0;
2066 			}
2067 
2068 			sock_put(other);
2069 		}
2070 	}
2071 
2072 	if (writable)
2073 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2074 	else
2075 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2076 
2077 	return mask;
2078 }
2079 
2080 #ifdef CONFIG_PROC_FS
2081 static struct sock *first_unix_socket(int *i)
2082 {
2083 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2084 		if (!hlist_empty(&unix_socket_table[*i]))
2085 			return __sk_head(&unix_socket_table[*i]);
2086 	}
2087 	return NULL;
2088 }
2089 
2090 static struct sock *next_unix_socket(int *i, struct sock *s)
2091 {
2092 	struct sock *next = sk_next(s);
2093 	/* More in this chain? */
2094 	if (next)
2095 		return next;
2096 	/* Look for next non-empty chain. */
2097 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2098 		if (!hlist_empty(&unix_socket_table[*i]))
2099 			return __sk_head(&unix_socket_table[*i]);
2100 	}
2101 	return NULL;
2102 }
2103 
2104 struct unix_iter_state {
2105 	struct seq_net_private p;
2106 	int i;
2107 };
2108 
2109 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2110 {
2111 	struct unix_iter_state *iter = seq->private;
2112 	loff_t off = 0;
2113 	struct sock *s;
2114 
2115 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2116 		if (sock_net(s) != seq_file_net(seq))
2117 			continue;
2118 		if (off == pos)
2119 			return s;
2120 		++off;
2121 	}
2122 	return NULL;
2123 }
2124 
2125 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2126 	__acquires(unix_table_lock)
2127 {
2128 	spin_lock(&unix_table_lock);
2129 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2130 }
2131 
2132 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2133 {
2134 	struct unix_iter_state *iter = seq->private;
2135 	struct sock *sk = v;
2136 	++*pos;
2137 
2138 	if (v == SEQ_START_TOKEN)
2139 		sk = first_unix_socket(&iter->i);
2140 	else
2141 		sk = next_unix_socket(&iter->i, sk);
2142 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2143 		sk = next_unix_socket(&iter->i, sk);
2144 	return sk;
2145 }
2146 
2147 static void unix_seq_stop(struct seq_file *seq, void *v)
2148 	__releases(unix_table_lock)
2149 {
2150 	spin_unlock(&unix_table_lock);
2151 }
2152 
2153 static int unix_seq_show(struct seq_file *seq, void *v)
2154 {
2155 
2156 	if (v == SEQ_START_TOKEN)
2157 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2158 			 "Inode Path\n");
2159 	else {
2160 		struct sock *s = v;
2161 		struct unix_sock *u = unix_sk(s);
2162 		unix_state_lock(s);
2163 
2164 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2165 			s,
2166 			atomic_read(&s->sk_refcnt),
2167 			0,
2168 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2169 			s->sk_type,
2170 			s->sk_socket ?
2171 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2172 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2173 			sock_i_ino(s));
2174 
2175 		if (u->addr) {
2176 			int i, len;
2177 			seq_putc(seq, ' ');
2178 
2179 			i = 0;
2180 			len = u->addr->len - sizeof(short);
2181 			if (!UNIX_ABSTRACT(s))
2182 				len--;
2183 			else {
2184 				seq_putc(seq, '@');
2185 				i++;
2186 			}
2187 			for ( ; i < len; i++)
2188 				seq_putc(seq, u->addr->name->sun_path[i]);
2189 		}
2190 		unix_state_unlock(s);
2191 		seq_putc(seq, '\n');
2192 	}
2193 
2194 	return 0;
2195 }
2196 
2197 static const struct seq_operations unix_seq_ops = {
2198 	.start  = unix_seq_start,
2199 	.next   = unix_seq_next,
2200 	.stop   = unix_seq_stop,
2201 	.show   = unix_seq_show,
2202 };
2203 
2204 static int unix_seq_open(struct inode *inode, struct file *file)
2205 {
2206 	return seq_open_net(inode, file, &unix_seq_ops,
2207 			    sizeof(struct unix_iter_state));
2208 }
2209 
2210 static const struct file_operations unix_seq_fops = {
2211 	.owner		= THIS_MODULE,
2212 	.open		= unix_seq_open,
2213 	.read		= seq_read,
2214 	.llseek		= seq_lseek,
2215 	.release	= seq_release_net,
2216 };
2217 
2218 #endif
2219 
2220 static const struct net_proto_family unix_family_ops = {
2221 	.family = PF_UNIX,
2222 	.create = unix_create,
2223 	.owner	= THIS_MODULE,
2224 };
2225 
2226 
2227 static int unix_net_init(struct net *net)
2228 {
2229 	int error = -ENOMEM;
2230 
2231 	net->unx.sysctl_max_dgram_qlen = 10;
2232 	if (unix_sysctl_register(net))
2233 		goto out;
2234 
2235 #ifdef CONFIG_PROC_FS
2236 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2237 		unix_sysctl_unregister(net);
2238 		goto out;
2239 	}
2240 #endif
2241 	error = 0;
2242 out:
2243 	return error;
2244 }
2245 
2246 static void unix_net_exit(struct net *net)
2247 {
2248 	unix_sysctl_unregister(net);
2249 	proc_net_remove(net, "unix");
2250 }
2251 
2252 static struct pernet_operations unix_net_ops = {
2253 	.init = unix_net_init,
2254 	.exit = unix_net_exit,
2255 };
2256 
2257 static int __init af_unix_init(void)
2258 {
2259 	int rc = -1;
2260 	struct sk_buff *dummy_skb;
2261 
2262 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2263 
2264 	rc = proto_register(&unix_proto, 1);
2265 	if (rc != 0) {
2266 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2267 		       __func__);
2268 		goto out;
2269 	}
2270 
2271 	sock_register(&unix_family_ops);
2272 	register_pernet_subsys(&unix_net_ops);
2273 out:
2274 	return rc;
2275 }
2276 
2277 static void __exit af_unix_exit(void)
2278 {
2279 	sock_unregister(PF_UNIX);
2280 	proto_unregister(&unix_proto);
2281 	unregister_pernet_subsys(&unix_net_ops);
2282 }
2283 
2284 /* Earlier than device_initcall() so that other drivers invoking
2285    request_module() don't end up in a loop when modprobe tries
2286    to use a UNIX socket. But later than subsys_initcall() because
2287    we depend on stuff initialised there */
2288 fs_initcall(af_unix_init);
2289 module_exit(af_unix_exit);
2290 
2291 MODULE_LICENSE("GPL");
2292 MODULE_ALIAS_NETPROTO(PF_UNIX);
2293