xref: /openbmc/linux/net/unix/af_unix.c (revision 4bdf0bb7)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
314 static void unix_write_space(struct sock *sk)
315 {
316 	read_lock(&sk->sk_callback_lock);
317 	if (unix_writable(sk)) {
318 		if (sk_has_sleeper(sk))
319 			wake_up_interruptible_sync(sk->sk_sleep);
320 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 	}
322 	read_unlock(&sk->sk_callback_lock);
323 }
324 
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332 		skb_queue_purge(&sk->sk_receive_queue);
333 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334 
335 		/* If one link of bidirectional dgram pipe is disconnected,
336 		 * we signal error. Messages are lost. Do not make this,
337 		 * when peer was not connected to us.
338 		 */
339 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340 			other->sk_err = ECONNRESET;
341 			other->sk_error_report(other);
342 		}
343 	}
344 }
345 
346 static void unix_sock_destructor(struct sock *sk)
347 {
348 	struct unix_sock *u = unix_sk(sk);
349 
350 	skb_queue_purge(&sk->sk_receive_queue);
351 
352 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353 	WARN_ON(!sk_unhashed(sk));
354 	WARN_ON(sk->sk_socket);
355 	if (!sock_flag(sk, SOCK_DEAD)) {
356 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357 		return;
358 	}
359 
360 	if (u->addr)
361 		unix_release_addr(u->addr);
362 
363 	atomic_dec(&unix_nr_socks);
364 	local_bh_disable();
365 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 	local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369 		atomic_read(&unix_nr_socks));
370 #endif
371 }
372 
373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375 	struct unix_sock *u = unix_sk(sk);
376 	struct dentry *dentry;
377 	struct vfsmount *mnt;
378 	struct sock *skpair;
379 	struct sk_buff *skb;
380 	int state;
381 
382 	unix_remove_socket(sk);
383 
384 	/* Clear state */
385 	unix_state_lock(sk);
386 	sock_orphan(sk);
387 	sk->sk_shutdown = SHUTDOWN_MASK;
388 	dentry	     = u->dentry;
389 	u->dentry    = NULL;
390 	mnt	     = u->mnt;
391 	u->mnt	     = NULL;
392 	state = sk->sk_state;
393 	sk->sk_state = TCP_CLOSE;
394 	unix_state_unlock(sk);
395 
396 	wake_up_interruptible_all(&u->peer_wait);
397 
398 	skpair = unix_peer(sk);
399 
400 	if (skpair != NULL) {
401 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402 			unix_state_lock(skpair);
403 			/* No more writes */
404 			skpair->sk_shutdown = SHUTDOWN_MASK;
405 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406 				skpair->sk_err = ECONNRESET;
407 			unix_state_unlock(skpair);
408 			skpair->sk_state_change(skpair);
409 			read_lock(&skpair->sk_callback_lock);
410 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 			read_unlock(&skpair->sk_callback_lock);
412 		}
413 		sock_put(skpair); /* It may now die */
414 		unix_peer(sk) = NULL;
415 	}
416 
417 	/* Try to flush out this socket. Throw out buffers at least */
418 
419 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420 		if (state == TCP_LISTEN)
421 			unix_release_sock(skb->sk, 1);
422 		/* passed fds are erased in the kfree_skb hook	      */
423 		kfree_skb(skb);
424 	}
425 
426 	if (dentry) {
427 		dput(dentry);
428 		mntput(mnt);
429 	}
430 
431 	sock_put(sk);
432 
433 	/* ---- Socket is dead now and most probably destroyed ---- */
434 
435 	/*
436 	 * Fixme: BSD difference: In BSD all sockets connected to use get
437 	 *	  ECONNRESET and we die on the spot. In Linux we behave
438 	 *	  like files and pipes do and wait for the last
439 	 *	  dereference.
440 	 *
441 	 * Can't we simply set sock->err?
442 	 *
443 	 *	  What the above comment does talk about? --ANK(980817)
444 	 */
445 
446 	if (unix_tot_inflight)
447 		unix_gc();		/* Garbage collect fds */
448 
449 	return 0;
450 }
451 
452 static int unix_listen(struct socket *sock, int backlog)
453 {
454 	int err;
455 	struct sock *sk = sock->sk;
456 	struct unix_sock *u = unix_sk(sk);
457 
458 	err = -EOPNOTSUPP;
459 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460 		goto out;	/* Only stream/seqpacket sockets accept */
461 	err = -EINVAL;
462 	if (!u->addr)
463 		goto out;	/* No listens on an unbound socket */
464 	unix_state_lock(sk);
465 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466 		goto out_unlock;
467 	if (backlog > sk->sk_max_ack_backlog)
468 		wake_up_interruptible_all(&u->peer_wait);
469 	sk->sk_max_ack_backlog	= backlog;
470 	sk->sk_state		= TCP_LISTEN;
471 	/* set credentials so connect can copy them */
472 	sk->sk_peercred.pid	= task_tgid_vnr(current);
473 	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474 	err = 0;
475 
476 out_unlock:
477 	unix_state_unlock(sk);
478 out:
479 	return err;
480 }
481 
482 static int unix_release(struct socket *);
483 static int unix_bind(struct socket *, struct sockaddr *, int);
484 static int unix_stream_connect(struct socket *, struct sockaddr *,
485 			       int addr_len, int flags);
486 static int unix_socketpair(struct socket *, struct socket *);
487 static int unix_accept(struct socket *, struct socket *, int);
488 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490 static unsigned int unix_dgram_poll(struct file *, struct socket *,
491 				    poll_table *);
492 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493 static int unix_shutdown(struct socket *, int);
494 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t);
496 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497 			       struct msghdr *, size_t, int);
498 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t);
500 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501 			      struct msghdr *, size_t, int);
502 static int unix_dgram_connect(struct socket *, struct sockaddr *,
503 			      int, int);
504 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505 				  struct msghdr *, size_t);
506 
507 static const struct proto_ops unix_stream_ops = {
508 	.family =	PF_UNIX,
509 	.owner =	THIS_MODULE,
510 	.release =	unix_release,
511 	.bind =		unix_bind,
512 	.connect =	unix_stream_connect,
513 	.socketpair =	unix_socketpair,
514 	.accept =	unix_accept,
515 	.getname =	unix_getname,
516 	.poll =		unix_poll,
517 	.ioctl =	unix_ioctl,
518 	.listen =	unix_listen,
519 	.shutdown =	unix_shutdown,
520 	.setsockopt =	sock_no_setsockopt,
521 	.getsockopt =	sock_no_getsockopt,
522 	.sendmsg =	unix_stream_sendmsg,
523 	.recvmsg =	unix_stream_recvmsg,
524 	.mmap =		sock_no_mmap,
525 	.sendpage =	sock_no_sendpage,
526 };
527 
528 static const struct proto_ops unix_dgram_ops = {
529 	.family =	PF_UNIX,
530 	.owner =	THIS_MODULE,
531 	.release =	unix_release,
532 	.bind =		unix_bind,
533 	.connect =	unix_dgram_connect,
534 	.socketpair =	unix_socketpair,
535 	.accept =	sock_no_accept,
536 	.getname =	unix_getname,
537 	.poll =		unix_dgram_poll,
538 	.ioctl =	unix_ioctl,
539 	.listen =	sock_no_listen,
540 	.shutdown =	unix_shutdown,
541 	.setsockopt =	sock_no_setsockopt,
542 	.getsockopt =	sock_no_getsockopt,
543 	.sendmsg =	unix_dgram_sendmsg,
544 	.recvmsg =	unix_dgram_recvmsg,
545 	.mmap =		sock_no_mmap,
546 	.sendpage =	sock_no_sendpage,
547 };
548 
549 static const struct proto_ops unix_seqpacket_ops = {
550 	.family =	PF_UNIX,
551 	.owner =	THIS_MODULE,
552 	.release =	unix_release,
553 	.bind =		unix_bind,
554 	.connect =	unix_stream_connect,
555 	.socketpair =	unix_socketpair,
556 	.accept =	unix_accept,
557 	.getname =	unix_getname,
558 	.poll =		unix_dgram_poll,
559 	.ioctl =	unix_ioctl,
560 	.listen =	unix_listen,
561 	.shutdown =	unix_shutdown,
562 	.setsockopt =	sock_no_setsockopt,
563 	.getsockopt =	sock_no_getsockopt,
564 	.sendmsg =	unix_seqpacket_sendmsg,
565 	.recvmsg =	unix_dgram_recvmsg,
566 	.mmap =		sock_no_mmap,
567 	.sendpage =	sock_no_sendpage,
568 };
569 
570 static struct proto unix_proto = {
571 	.name			= "UNIX",
572 	.owner			= THIS_MODULE,
573 	.obj_size		= sizeof(struct unix_sock),
574 };
575 
576 /*
577  * AF_UNIX sockets do not interact with hardware, hence they
578  * dont trigger interrupts - so it's safe for them to have
579  * bh-unsafe locking for their sk_receive_queue.lock. Split off
580  * this special lock-class by reinitializing the spinlock key:
581  */
582 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583 
584 static struct sock *unix_create1(struct net *net, struct socket *sock)
585 {
586 	struct sock *sk = NULL;
587 	struct unix_sock *u;
588 
589 	atomic_inc(&unix_nr_socks);
590 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591 		goto out;
592 
593 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594 	if (!sk)
595 		goto out;
596 
597 	sock_init_data(sock, sk);
598 	lockdep_set_class(&sk->sk_receive_queue.lock,
599 				&af_unix_sk_receive_queue_lock_key);
600 
601 	sk->sk_write_space	= unix_write_space;
602 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603 	sk->sk_destruct		= unix_sock_destructor;
604 	u	  = unix_sk(sk);
605 	u->dentry = NULL;
606 	u->mnt	  = NULL;
607 	spin_lock_init(&u->lock);
608 	atomic_long_set(&u->inflight, 0);
609 	INIT_LIST_HEAD(&u->link);
610 	mutex_init(&u->readlock); /* single task reading lock */
611 	init_waitqueue_head(&u->peer_wait);
612 	unix_insert_socket(unix_sockets_unbound, sk);
613 out:
614 	if (sk == NULL)
615 		atomic_dec(&unix_nr_socks);
616 	else {
617 		local_bh_disable();
618 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619 		local_bh_enable();
620 	}
621 	return sk;
622 }
623 
624 static int unix_create(struct net *net, struct socket *sock, int protocol)
625 {
626 	if (protocol && protocol != PF_UNIX)
627 		return -EPROTONOSUPPORT;
628 
629 	sock->state = SS_UNCONNECTED;
630 
631 	switch (sock->type) {
632 	case SOCK_STREAM:
633 		sock->ops = &unix_stream_ops;
634 		break;
635 		/*
636 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
637 		 *	nothing uses it.
638 		 */
639 	case SOCK_RAW:
640 		sock->type = SOCK_DGRAM;
641 	case SOCK_DGRAM:
642 		sock->ops = &unix_dgram_ops;
643 		break;
644 	case SOCK_SEQPACKET:
645 		sock->ops = &unix_seqpacket_ops;
646 		break;
647 	default:
648 		return -ESOCKTNOSUPPORT;
649 	}
650 
651 	return unix_create1(net, sock) ? 0 : -ENOMEM;
652 }
653 
654 static int unix_release(struct socket *sock)
655 {
656 	struct sock *sk = sock->sk;
657 
658 	if (!sk)
659 		return 0;
660 
661 	sock->sk = NULL;
662 
663 	return unix_release_sock(sk, 0);
664 }
665 
666 static int unix_autobind(struct socket *sock)
667 {
668 	struct sock *sk = sock->sk;
669 	struct net *net = sock_net(sk);
670 	struct unix_sock *u = unix_sk(sk);
671 	static u32 ordernum = 1;
672 	struct unix_address *addr;
673 	int err;
674 
675 	mutex_lock(&u->readlock);
676 
677 	err = 0;
678 	if (u->addr)
679 		goto out;
680 
681 	err = -ENOMEM;
682 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683 	if (!addr)
684 		goto out;
685 
686 	addr->name->sun_family = AF_UNIX;
687 	atomic_set(&addr->refcnt, 1);
688 
689 retry:
690 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692 
693 	spin_lock(&unix_table_lock);
694 	ordernum = (ordernum+1)&0xFFFFF;
695 
696 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697 				      addr->hash)) {
698 		spin_unlock(&unix_table_lock);
699 		/* Sanity yield. It is unusual case, but yet... */
700 		if (!(ordernum&0xFF))
701 			yield();
702 		goto retry;
703 	}
704 	addr->hash ^= sk->sk_type;
705 
706 	__unix_remove_socket(sk);
707 	u->addr = addr;
708 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
709 	spin_unlock(&unix_table_lock);
710 	err = 0;
711 
712 out:	mutex_unlock(&u->readlock);
713 	return err;
714 }
715 
716 static struct sock *unix_find_other(struct net *net,
717 				    struct sockaddr_un *sunname, int len,
718 				    int type, unsigned hash, int *error)
719 {
720 	struct sock *u;
721 	struct path path;
722 	int err = 0;
723 
724 	if (sunname->sun_path[0]) {
725 		struct inode *inode;
726 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727 		if (err)
728 			goto fail;
729 		inode = path.dentry->d_inode;
730 		err = inode_permission(inode, MAY_WRITE);
731 		if (err)
732 			goto put_fail;
733 
734 		err = -ECONNREFUSED;
735 		if (!S_ISSOCK(inode->i_mode))
736 			goto put_fail;
737 		u = unix_find_socket_byinode(net, inode);
738 		if (!u)
739 			goto put_fail;
740 
741 		if (u->sk_type == type)
742 			touch_atime(path.mnt, path.dentry);
743 
744 		path_put(&path);
745 
746 		err = -EPROTOTYPE;
747 		if (u->sk_type != type) {
748 			sock_put(u);
749 			goto fail;
750 		}
751 	} else {
752 		err = -ECONNREFUSED;
753 		u = unix_find_socket_byname(net, sunname, len, type, hash);
754 		if (u) {
755 			struct dentry *dentry;
756 			dentry = unix_sk(u)->dentry;
757 			if (dentry)
758 				touch_atime(unix_sk(u)->mnt, dentry);
759 		} else
760 			goto fail;
761 	}
762 	return u;
763 
764 put_fail:
765 	path_put(&path);
766 fail:
767 	*error = err;
768 	return NULL;
769 }
770 
771 
772 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773 {
774 	struct sock *sk = sock->sk;
775 	struct net *net = sock_net(sk);
776 	struct unix_sock *u = unix_sk(sk);
777 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778 	struct dentry *dentry = NULL;
779 	struct nameidata nd;
780 	int err;
781 	unsigned hash;
782 	struct unix_address *addr;
783 	struct hlist_head *list;
784 
785 	err = -EINVAL;
786 	if (sunaddr->sun_family != AF_UNIX)
787 		goto out;
788 
789 	if (addr_len == sizeof(short)) {
790 		err = unix_autobind(sock);
791 		goto out;
792 	}
793 
794 	err = unix_mkname(sunaddr, addr_len, &hash);
795 	if (err < 0)
796 		goto out;
797 	addr_len = err;
798 
799 	mutex_lock(&u->readlock);
800 
801 	err = -EINVAL;
802 	if (u->addr)
803 		goto out_up;
804 
805 	err = -ENOMEM;
806 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807 	if (!addr)
808 		goto out_up;
809 
810 	memcpy(addr->name, sunaddr, addr_len);
811 	addr->len = addr_len;
812 	addr->hash = hash ^ sk->sk_type;
813 	atomic_set(&addr->refcnt, 1);
814 
815 	if (sunaddr->sun_path[0]) {
816 		unsigned int mode;
817 		err = 0;
818 		/*
819 		 * Get the parent directory, calculate the hash for last
820 		 * component.
821 		 */
822 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823 		if (err)
824 			goto out_mknod_parent;
825 
826 		dentry = lookup_create(&nd, 0);
827 		err = PTR_ERR(dentry);
828 		if (IS_ERR(dentry))
829 			goto out_mknod_unlock;
830 
831 		/*
832 		 * All right, let's create it.
833 		 */
834 		mode = S_IFSOCK |
835 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
836 		err = mnt_want_write(nd.path.mnt);
837 		if (err)
838 			goto out_mknod_dput;
839 		err = security_path_mknod(&nd.path, dentry, mode, 0);
840 		if (err)
841 			goto out_mknod_drop_write;
842 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
843 out_mknod_drop_write:
844 		mnt_drop_write(nd.path.mnt);
845 		if (err)
846 			goto out_mknod_dput;
847 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
848 		dput(nd.path.dentry);
849 		nd.path.dentry = dentry;
850 
851 		addr->hash = UNIX_HASH_SIZE;
852 	}
853 
854 	spin_lock(&unix_table_lock);
855 
856 	if (!sunaddr->sun_path[0]) {
857 		err = -EADDRINUSE;
858 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
859 					      sk->sk_type, hash)) {
860 			unix_release_addr(addr);
861 			goto out_unlock;
862 		}
863 
864 		list = &unix_socket_table[addr->hash];
865 	} else {
866 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
867 		u->dentry = nd.path.dentry;
868 		u->mnt    = nd.path.mnt;
869 	}
870 
871 	err = 0;
872 	__unix_remove_socket(sk);
873 	u->addr = addr;
874 	__unix_insert_socket(list, sk);
875 
876 out_unlock:
877 	spin_unlock(&unix_table_lock);
878 out_up:
879 	mutex_unlock(&u->readlock);
880 out:
881 	return err;
882 
883 out_mknod_dput:
884 	dput(dentry);
885 out_mknod_unlock:
886 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
887 	path_put(&nd.path);
888 out_mknod_parent:
889 	if (err == -EEXIST)
890 		err = -EADDRINUSE;
891 	unix_release_addr(addr);
892 	goto out_up;
893 }
894 
895 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
896 {
897 	if (unlikely(sk1 == sk2) || !sk2) {
898 		unix_state_lock(sk1);
899 		return;
900 	}
901 	if (sk1 < sk2) {
902 		unix_state_lock(sk1);
903 		unix_state_lock_nested(sk2);
904 	} else {
905 		unix_state_lock(sk2);
906 		unix_state_lock_nested(sk1);
907 	}
908 }
909 
910 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
911 {
912 	if (unlikely(sk1 == sk2) || !sk2) {
913 		unix_state_unlock(sk1);
914 		return;
915 	}
916 	unix_state_unlock(sk1);
917 	unix_state_unlock(sk2);
918 }
919 
920 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
921 			      int alen, int flags)
922 {
923 	struct sock *sk = sock->sk;
924 	struct net *net = sock_net(sk);
925 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
926 	struct sock *other;
927 	unsigned hash;
928 	int err;
929 
930 	if (addr->sa_family != AF_UNSPEC) {
931 		err = unix_mkname(sunaddr, alen, &hash);
932 		if (err < 0)
933 			goto out;
934 		alen = err;
935 
936 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
937 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
938 			goto out;
939 
940 restart:
941 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
942 		if (!other)
943 			goto out;
944 
945 		unix_state_double_lock(sk, other);
946 
947 		/* Apparently VFS overslept socket death. Retry. */
948 		if (sock_flag(other, SOCK_DEAD)) {
949 			unix_state_double_unlock(sk, other);
950 			sock_put(other);
951 			goto restart;
952 		}
953 
954 		err = -EPERM;
955 		if (!unix_may_send(sk, other))
956 			goto out_unlock;
957 
958 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
959 		if (err)
960 			goto out_unlock;
961 
962 	} else {
963 		/*
964 		 *	1003.1g breaking connected state with AF_UNSPEC
965 		 */
966 		other = NULL;
967 		unix_state_double_lock(sk, other);
968 	}
969 
970 	/*
971 	 * If it was connected, reconnect.
972 	 */
973 	if (unix_peer(sk)) {
974 		struct sock *old_peer = unix_peer(sk);
975 		unix_peer(sk) = other;
976 		unix_state_double_unlock(sk, other);
977 
978 		if (other != old_peer)
979 			unix_dgram_disconnected(sk, old_peer);
980 		sock_put(old_peer);
981 	} else {
982 		unix_peer(sk) = other;
983 		unix_state_double_unlock(sk, other);
984 	}
985 	return 0;
986 
987 out_unlock:
988 	unix_state_double_unlock(sk, other);
989 	sock_put(other);
990 out:
991 	return err;
992 }
993 
994 static long unix_wait_for_peer(struct sock *other, long timeo)
995 {
996 	struct unix_sock *u = unix_sk(other);
997 	int sched;
998 	DEFINE_WAIT(wait);
999 
1000 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001 
1002 	sched = !sock_flag(other, SOCK_DEAD) &&
1003 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1004 		unix_recvq_full(other);
1005 
1006 	unix_state_unlock(other);
1007 
1008 	if (sched)
1009 		timeo = schedule_timeout(timeo);
1010 
1011 	finish_wait(&u->peer_wait, &wait);
1012 	return timeo;
1013 }
1014 
1015 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016 			       int addr_len, int flags)
1017 {
1018 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019 	struct sock *sk = sock->sk;
1020 	struct net *net = sock_net(sk);
1021 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022 	struct sock *newsk = NULL;
1023 	struct sock *other = NULL;
1024 	struct sk_buff *skb = NULL;
1025 	unsigned hash;
1026 	int st;
1027 	int err;
1028 	long timeo;
1029 
1030 	err = unix_mkname(sunaddr, addr_len, &hash);
1031 	if (err < 0)
1032 		goto out;
1033 	addr_len = err;
1034 
1035 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1036 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1037 		goto out;
1038 
1039 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040 
1041 	/* First of all allocate resources.
1042 	   If we will make it after state is locked,
1043 	   we will have to recheck all again in any case.
1044 	 */
1045 
1046 	err = -ENOMEM;
1047 
1048 	/* create new sock for complete connection */
1049 	newsk = unix_create1(sock_net(sk), NULL);
1050 	if (newsk == NULL)
1051 		goto out;
1052 
1053 	/* Allocate skb for sending to listening sock */
1054 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055 	if (skb == NULL)
1056 		goto out;
1057 
1058 restart:
1059 	/*  Find listening sock. */
1060 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061 	if (!other)
1062 		goto out;
1063 
1064 	/* Latch state of peer */
1065 	unix_state_lock(other);
1066 
1067 	/* Apparently VFS overslept socket death. Retry. */
1068 	if (sock_flag(other, SOCK_DEAD)) {
1069 		unix_state_unlock(other);
1070 		sock_put(other);
1071 		goto restart;
1072 	}
1073 
1074 	err = -ECONNREFUSED;
1075 	if (other->sk_state != TCP_LISTEN)
1076 		goto out_unlock;
1077 
1078 	if (unix_recvq_full(other)) {
1079 		err = -EAGAIN;
1080 		if (!timeo)
1081 			goto out_unlock;
1082 
1083 		timeo = unix_wait_for_peer(other, timeo);
1084 
1085 		err = sock_intr_errno(timeo);
1086 		if (signal_pending(current))
1087 			goto out;
1088 		sock_put(other);
1089 		goto restart;
1090 	}
1091 
1092 	/* Latch our state.
1093 
1094 	   It is tricky place. We need to grab write lock and cannot
1095 	   drop lock on peer. It is dangerous because deadlock is
1096 	   possible. Connect to self case and simultaneous
1097 	   attempt to connect are eliminated by checking socket
1098 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1099 	   check this before attempt to grab lock.
1100 
1101 	   Well, and we have to recheck the state after socket locked.
1102 	 */
1103 	st = sk->sk_state;
1104 
1105 	switch (st) {
1106 	case TCP_CLOSE:
1107 		/* This is ok... continue with connect */
1108 		break;
1109 	case TCP_ESTABLISHED:
1110 		/* Socket is already connected */
1111 		err = -EISCONN;
1112 		goto out_unlock;
1113 	default:
1114 		err = -EINVAL;
1115 		goto out_unlock;
1116 	}
1117 
1118 	unix_state_lock_nested(sk);
1119 
1120 	if (sk->sk_state != st) {
1121 		unix_state_unlock(sk);
1122 		unix_state_unlock(other);
1123 		sock_put(other);
1124 		goto restart;
1125 	}
1126 
1127 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1128 	if (err) {
1129 		unix_state_unlock(sk);
1130 		goto out_unlock;
1131 	}
1132 
1133 	/* The way is open! Fastly set all the necessary fields... */
1134 
1135 	sock_hold(sk);
1136 	unix_peer(newsk)	= sk;
1137 	newsk->sk_state		= TCP_ESTABLISHED;
1138 	newsk->sk_type		= sk->sk_type;
1139 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1140 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1141 	newu = unix_sk(newsk);
1142 	newsk->sk_sleep		= &newu->peer_wait;
1143 	otheru = unix_sk(other);
1144 
1145 	/* copy address information from listening to new sock*/
1146 	if (otheru->addr) {
1147 		atomic_inc(&otheru->addr->refcnt);
1148 		newu->addr = otheru->addr;
1149 	}
1150 	if (otheru->dentry) {
1151 		newu->dentry	= dget(otheru->dentry);
1152 		newu->mnt	= mntget(otheru->mnt);
1153 	}
1154 
1155 	/* Set credentials */
1156 	sk->sk_peercred = other->sk_peercred;
1157 
1158 	sock->state	= SS_CONNECTED;
1159 	sk->sk_state	= TCP_ESTABLISHED;
1160 	sock_hold(newsk);
1161 
1162 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1163 	unix_peer(sk)	= newsk;
1164 
1165 	unix_state_unlock(sk);
1166 
1167 	/* take ten and and send info to listening sock */
1168 	spin_lock(&other->sk_receive_queue.lock);
1169 	__skb_queue_tail(&other->sk_receive_queue, skb);
1170 	spin_unlock(&other->sk_receive_queue.lock);
1171 	unix_state_unlock(other);
1172 	other->sk_data_ready(other, 0);
1173 	sock_put(other);
1174 	return 0;
1175 
1176 out_unlock:
1177 	if (other)
1178 		unix_state_unlock(other);
1179 
1180 out:
1181 	kfree_skb(skb);
1182 	if (newsk)
1183 		unix_release_sock(newsk, 0);
1184 	if (other)
1185 		sock_put(other);
1186 	return err;
1187 }
1188 
1189 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1190 {
1191 	struct sock *ska = socka->sk, *skb = sockb->sk;
1192 
1193 	/* Join our sockets back to back */
1194 	sock_hold(ska);
1195 	sock_hold(skb);
1196 	unix_peer(ska) = skb;
1197 	unix_peer(skb) = ska;
1198 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1199 	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1200 	ska->sk_peercred.uid = skb->sk_peercred.uid;
1201 	ska->sk_peercred.gid = skb->sk_peercred.gid;
1202 
1203 	if (ska->sk_type != SOCK_DGRAM) {
1204 		ska->sk_state = TCP_ESTABLISHED;
1205 		skb->sk_state = TCP_ESTABLISHED;
1206 		socka->state  = SS_CONNECTED;
1207 		sockb->state  = SS_CONNECTED;
1208 	}
1209 	return 0;
1210 }
1211 
1212 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1213 {
1214 	struct sock *sk = sock->sk;
1215 	struct sock *tsk;
1216 	struct sk_buff *skb;
1217 	int err;
1218 
1219 	err = -EOPNOTSUPP;
1220 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1221 		goto out;
1222 
1223 	err = -EINVAL;
1224 	if (sk->sk_state != TCP_LISTEN)
1225 		goto out;
1226 
1227 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1228 	 * so that no locks are necessary.
1229 	 */
1230 
1231 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1232 	if (!skb) {
1233 		/* This means receive shutdown. */
1234 		if (err == 0)
1235 			err = -EINVAL;
1236 		goto out;
1237 	}
1238 
1239 	tsk = skb->sk;
1240 	skb_free_datagram(sk, skb);
1241 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1242 
1243 	/* attach accepted sock to socket */
1244 	unix_state_lock(tsk);
1245 	newsock->state = SS_CONNECTED;
1246 	sock_graft(tsk, newsock);
1247 	unix_state_unlock(tsk);
1248 	return 0;
1249 
1250 out:
1251 	return err;
1252 }
1253 
1254 
1255 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1256 {
1257 	struct sock *sk = sock->sk;
1258 	struct unix_sock *u;
1259 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1260 	int err = 0;
1261 
1262 	if (peer) {
1263 		sk = unix_peer_get(sk);
1264 
1265 		err = -ENOTCONN;
1266 		if (!sk)
1267 			goto out;
1268 		err = 0;
1269 	} else {
1270 		sock_hold(sk);
1271 	}
1272 
1273 	u = unix_sk(sk);
1274 	unix_state_lock(sk);
1275 	if (!u->addr) {
1276 		sunaddr->sun_family = AF_UNIX;
1277 		sunaddr->sun_path[0] = 0;
1278 		*uaddr_len = sizeof(short);
1279 	} else {
1280 		struct unix_address *addr = u->addr;
1281 
1282 		*uaddr_len = addr->len;
1283 		memcpy(sunaddr, addr->name, *uaddr_len);
1284 	}
1285 	unix_state_unlock(sk);
1286 	sock_put(sk);
1287 out:
1288 	return err;
1289 }
1290 
1291 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1292 {
1293 	int i;
1294 
1295 	scm->fp = UNIXCB(skb).fp;
1296 	skb->destructor = sock_wfree;
1297 	UNIXCB(skb).fp = NULL;
1298 
1299 	for (i = scm->fp->count-1; i >= 0; i--)
1300 		unix_notinflight(scm->fp->fp[i]);
1301 }
1302 
1303 static void unix_destruct_fds(struct sk_buff *skb)
1304 {
1305 	struct scm_cookie scm;
1306 	memset(&scm, 0, sizeof(scm));
1307 	unix_detach_fds(&scm, skb);
1308 
1309 	/* Alas, it calls VFS */
1310 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1311 	scm_destroy(&scm);
1312 	sock_wfree(skb);
1313 }
1314 
1315 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1316 {
1317 	int i;
1318 
1319 	/*
1320 	 * Need to duplicate file references for the sake of garbage
1321 	 * collection.  Otherwise a socket in the fps might become a
1322 	 * candidate for GC while the skb is not yet queued.
1323 	 */
1324 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1325 	if (!UNIXCB(skb).fp)
1326 		return -ENOMEM;
1327 
1328 	for (i = scm->fp->count-1; i >= 0; i--)
1329 		unix_inflight(scm->fp->fp[i]);
1330 	skb->destructor = unix_destruct_fds;
1331 	return 0;
1332 }
1333 
1334 /*
1335  *	Send AF_UNIX data.
1336  */
1337 
1338 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1339 			      struct msghdr *msg, size_t len)
1340 {
1341 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1342 	struct sock *sk = sock->sk;
1343 	struct net *net = sock_net(sk);
1344 	struct unix_sock *u = unix_sk(sk);
1345 	struct sockaddr_un *sunaddr = msg->msg_name;
1346 	struct sock *other = NULL;
1347 	int namelen = 0; /* fake GCC */
1348 	int err;
1349 	unsigned hash;
1350 	struct sk_buff *skb;
1351 	long timeo;
1352 	struct scm_cookie tmp_scm;
1353 
1354 	if (NULL == siocb->scm)
1355 		siocb->scm = &tmp_scm;
1356 	wait_for_unix_gc();
1357 	err = scm_send(sock, msg, siocb->scm);
1358 	if (err < 0)
1359 		return err;
1360 
1361 	err = -EOPNOTSUPP;
1362 	if (msg->msg_flags&MSG_OOB)
1363 		goto out;
1364 
1365 	if (msg->msg_namelen) {
1366 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1367 		if (err < 0)
1368 			goto out;
1369 		namelen = err;
1370 	} else {
1371 		sunaddr = NULL;
1372 		err = -ENOTCONN;
1373 		other = unix_peer_get(sk);
1374 		if (!other)
1375 			goto out;
1376 	}
1377 
1378 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1379 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1380 		goto out;
1381 
1382 	err = -EMSGSIZE;
1383 	if (len > sk->sk_sndbuf - 32)
1384 		goto out;
1385 
1386 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1387 	if (skb == NULL)
1388 		goto out;
1389 
1390 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1391 	if (siocb->scm->fp) {
1392 		err = unix_attach_fds(siocb->scm, skb);
1393 		if (err)
1394 			goto out_free;
1395 	}
1396 	unix_get_secdata(siocb->scm, skb);
1397 
1398 	skb_reset_transport_header(skb);
1399 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1400 	if (err)
1401 		goto out_free;
1402 
1403 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1404 
1405 restart:
1406 	if (!other) {
1407 		err = -ECONNRESET;
1408 		if (sunaddr == NULL)
1409 			goto out_free;
1410 
1411 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1412 					hash, &err);
1413 		if (other == NULL)
1414 			goto out_free;
1415 	}
1416 
1417 	unix_state_lock(other);
1418 	err = -EPERM;
1419 	if (!unix_may_send(sk, other))
1420 		goto out_unlock;
1421 
1422 	if (sock_flag(other, SOCK_DEAD)) {
1423 		/*
1424 		 *	Check with 1003.1g - what should
1425 		 *	datagram error
1426 		 */
1427 		unix_state_unlock(other);
1428 		sock_put(other);
1429 
1430 		err = 0;
1431 		unix_state_lock(sk);
1432 		if (unix_peer(sk) == other) {
1433 			unix_peer(sk) = NULL;
1434 			unix_state_unlock(sk);
1435 
1436 			unix_dgram_disconnected(sk, other);
1437 			sock_put(other);
1438 			err = -ECONNREFUSED;
1439 		} else {
1440 			unix_state_unlock(sk);
1441 		}
1442 
1443 		other = NULL;
1444 		if (err)
1445 			goto out_free;
1446 		goto restart;
1447 	}
1448 
1449 	err = -EPIPE;
1450 	if (other->sk_shutdown & RCV_SHUTDOWN)
1451 		goto out_unlock;
1452 
1453 	if (sk->sk_type != SOCK_SEQPACKET) {
1454 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1455 		if (err)
1456 			goto out_unlock;
1457 	}
1458 
1459 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1460 		if (!timeo) {
1461 			err = -EAGAIN;
1462 			goto out_unlock;
1463 		}
1464 
1465 		timeo = unix_wait_for_peer(other, timeo);
1466 
1467 		err = sock_intr_errno(timeo);
1468 		if (signal_pending(current))
1469 			goto out_free;
1470 
1471 		goto restart;
1472 	}
1473 
1474 	skb_queue_tail(&other->sk_receive_queue, skb);
1475 	unix_state_unlock(other);
1476 	other->sk_data_ready(other, len);
1477 	sock_put(other);
1478 	scm_destroy(siocb->scm);
1479 	return len;
1480 
1481 out_unlock:
1482 	unix_state_unlock(other);
1483 out_free:
1484 	kfree_skb(skb);
1485 out:
1486 	if (other)
1487 		sock_put(other);
1488 	scm_destroy(siocb->scm);
1489 	return err;
1490 }
1491 
1492 
1493 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1494 			       struct msghdr *msg, size_t len)
1495 {
1496 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1497 	struct sock *sk = sock->sk;
1498 	struct sock *other = NULL;
1499 	struct sockaddr_un *sunaddr = msg->msg_name;
1500 	int err, size;
1501 	struct sk_buff *skb;
1502 	int sent = 0;
1503 	struct scm_cookie tmp_scm;
1504 	bool fds_sent = false;
1505 
1506 	if (NULL == siocb->scm)
1507 		siocb->scm = &tmp_scm;
1508 	wait_for_unix_gc();
1509 	err = scm_send(sock, msg, siocb->scm);
1510 	if (err < 0)
1511 		return err;
1512 
1513 	err = -EOPNOTSUPP;
1514 	if (msg->msg_flags&MSG_OOB)
1515 		goto out_err;
1516 
1517 	if (msg->msg_namelen) {
1518 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1519 		goto out_err;
1520 	} else {
1521 		sunaddr = NULL;
1522 		err = -ENOTCONN;
1523 		other = unix_peer(sk);
1524 		if (!other)
1525 			goto out_err;
1526 	}
1527 
1528 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1529 		goto pipe_err;
1530 
1531 	while (sent < len) {
1532 		/*
1533 		 *	Optimisation for the fact that under 0.01% of X
1534 		 *	messages typically need breaking up.
1535 		 */
1536 
1537 		size = len-sent;
1538 
1539 		/* Keep two messages in the pipe so it schedules better */
1540 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1541 			size = (sk->sk_sndbuf >> 1) - 64;
1542 
1543 		if (size > SKB_MAX_ALLOC)
1544 			size = SKB_MAX_ALLOC;
1545 
1546 		/*
1547 		 *	Grab a buffer
1548 		 */
1549 
1550 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1551 					  &err);
1552 
1553 		if (skb == NULL)
1554 			goto out_err;
1555 
1556 		/*
1557 		 *	If you pass two values to the sock_alloc_send_skb
1558 		 *	it tries to grab the large buffer with GFP_NOFS
1559 		 *	(which can fail easily), and if it fails grab the
1560 		 *	fallback size buffer which is under a page and will
1561 		 *	succeed. [Alan]
1562 		 */
1563 		size = min_t(int, size, skb_tailroom(skb));
1564 
1565 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1566 		/* Only send the fds in the first buffer */
1567 		if (siocb->scm->fp && !fds_sent) {
1568 			err = unix_attach_fds(siocb->scm, skb);
1569 			if (err) {
1570 				kfree_skb(skb);
1571 				goto out_err;
1572 			}
1573 			fds_sent = true;
1574 		}
1575 
1576 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1577 		if (err) {
1578 			kfree_skb(skb);
1579 			goto out_err;
1580 		}
1581 
1582 		unix_state_lock(other);
1583 
1584 		if (sock_flag(other, SOCK_DEAD) ||
1585 		    (other->sk_shutdown & RCV_SHUTDOWN))
1586 			goto pipe_err_free;
1587 
1588 		skb_queue_tail(&other->sk_receive_queue, skb);
1589 		unix_state_unlock(other);
1590 		other->sk_data_ready(other, size);
1591 		sent += size;
1592 	}
1593 
1594 	scm_destroy(siocb->scm);
1595 	siocb->scm = NULL;
1596 
1597 	return sent;
1598 
1599 pipe_err_free:
1600 	unix_state_unlock(other);
1601 	kfree_skb(skb);
1602 pipe_err:
1603 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1604 		send_sig(SIGPIPE, current, 0);
1605 	err = -EPIPE;
1606 out_err:
1607 	scm_destroy(siocb->scm);
1608 	siocb->scm = NULL;
1609 	return sent ? : err;
1610 }
1611 
1612 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1613 				  struct msghdr *msg, size_t len)
1614 {
1615 	int err;
1616 	struct sock *sk = sock->sk;
1617 
1618 	err = sock_error(sk);
1619 	if (err)
1620 		return err;
1621 
1622 	if (sk->sk_state != TCP_ESTABLISHED)
1623 		return -ENOTCONN;
1624 
1625 	if (msg->msg_namelen)
1626 		msg->msg_namelen = 0;
1627 
1628 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1629 }
1630 
1631 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1632 {
1633 	struct unix_sock *u = unix_sk(sk);
1634 
1635 	msg->msg_namelen = 0;
1636 	if (u->addr) {
1637 		msg->msg_namelen = u->addr->len;
1638 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1639 	}
1640 }
1641 
1642 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1643 			      struct msghdr *msg, size_t size,
1644 			      int flags)
1645 {
1646 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1647 	struct scm_cookie tmp_scm;
1648 	struct sock *sk = sock->sk;
1649 	struct unix_sock *u = unix_sk(sk);
1650 	int noblock = flags & MSG_DONTWAIT;
1651 	struct sk_buff *skb;
1652 	int err;
1653 
1654 	err = -EOPNOTSUPP;
1655 	if (flags&MSG_OOB)
1656 		goto out;
1657 
1658 	msg->msg_namelen = 0;
1659 
1660 	mutex_lock(&u->readlock);
1661 
1662 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1663 	if (!skb) {
1664 		unix_state_lock(sk);
1665 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1666 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1667 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1668 			err = 0;
1669 		unix_state_unlock(sk);
1670 		goto out_unlock;
1671 	}
1672 
1673 	wake_up_interruptible_sync(&u->peer_wait);
1674 
1675 	if (msg->msg_name)
1676 		unix_copy_addr(msg, skb->sk);
1677 
1678 	if (size > skb->len)
1679 		size = skb->len;
1680 	else if (size < skb->len)
1681 		msg->msg_flags |= MSG_TRUNC;
1682 
1683 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1684 	if (err)
1685 		goto out_free;
1686 
1687 	if (!siocb->scm) {
1688 		siocb->scm = &tmp_scm;
1689 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1690 	}
1691 	siocb->scm->creds = *UNIXCREDS(skb);
1692 	unix_set_secdata(siocb->scm, skb);
1693 
1694 	if (!(flags & MSG_PEEK)) {
1695 		if (UNIXCB(skb).fp)
1696 			unix_detach_fds(siocb->scm, skb);
1697 	} else {
1698 		/* It is questionable: on PEEK we could:
1699 		   - do not return fds - good, but too simple 8)
1700 		   - return fds, and do not return them on read (old strategy,
1701 		     apparently wrong)
1702 		   - clone fds (I chose it for now, it is the most universal
1703 		     solution)
1704 
1705 		   POSIX 1003.1g does not actually define this clearly
1706 		   at all. POSIX 1003.1g doesn't define a lot of things
1707 		   clearly however!
1708 
1709 		*/
1710 		if (UNIXCB(skb).fp)
1711 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1712 	}
1713 	err = size;
1714 
1715 	scm_recv(sock, msg, siocb->scm, flags);
1716 
1717 out_free:
1718 	skb_free_datagram(sk, skb);
1719 out_unlock:
1720 	mutex_unlock(&u->readlock);
1721 out:
1722 	return err;
1723 }
1724 
1725 /*
1726  *	Sleep until data has arrive. But check for races..
1727  */
1728 
1729 static long unix_stream_data_wait(struct sock *sk, long timeo)
1730 {
1731 	DEFINE_WAIT(wait);
1732 
1733 	unix_state_lock(sk);
1734 
1735 	for (;;) {
1736 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1737 
1738 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1739 		    sk->sk_err ||
1740 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1741 		    signal_pending(current) ||
1742 		    !timeo)
1743 			break;
1744 
1745 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1746 		unix_state_unlock(sk);
1747 		timeo = schedule_timeout(timeo);
1748 		unix_state_lock(sk);
1749 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1750 	}
1751 
1752 	finish_wait(sk->sk_sleep, &wait);
1753 	unix_state_unlock(sk);
1754 	return timeo;
1755 }
1756 
1757 
1758 
1759 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1760 			       struct msghdr *msg, size_t size,
1761 			       int flags)
1762 {
1763 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1764 	struct scm_cookie tmp_scm;
1765 	struct sock *sk = sock->sk;
1766 	struct unix_sock *u = unix_sk(sk);
1767 	struct sockaddr_un *sunaddr = msg->msg_name;
1768 	int copied = 0;
1769 	int check_creds = 0;
1770 	int target;
1771 	int err = 0;
1772 	long timeo;
1773 
1774 	err = -EINVAL;
1775 	if (sk->sk_state != TCP_ESTABLISHED)
1776 		goto out;
1777 
1778 	err = -EOPNOTSUPP;
1779 	if (flags&MSG_OOB)
1780 		goto out;
1781 
1782 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1783 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1784 
1785 	msg->msg_namelen = 0;
1786 
1787 	/* Lock the socket to prevent queue disordering
1788 	 * while sleeps in memcpy_tomsg
1789 	 */
1790 
1791 	if (!siocb->scm) {
1792 		siocb->scm = &tmp_scm;
1793 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1794 	}
1795 
1796 	mutex_lock(&u->readlock);
1797 
1798 	do {
1799 		int chunk;
1800 		struct sk_buff *skb;
1801 
1802 		unix_state_lock(sk);
1803 		skb = skb_dequeue(&sk->sk_receive_queue);
1804 		if (skb == NULL) {
1805 			if (copied >= target)
1806 				goto unlock;
1807 
1808 			/*
1809 			 *	POSIX 1003.1g mandates this order.
1810 			 */
1811 
1812 			err = sock_error(sk);
1813 			if (err)
1814 				goto unlock;
1815 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1816 				goto unlock;
1817 
1818 			unix_state_unlock(sk);
1819 			err = -EAGAIN;
1820 			if (!timeo)
1821 				break;
1822 			mutex_unlock(&u->readlock);
1823 
1824 			timeo = unix_stream_data_wait(sk, timeo);
1825 
1826 			if (signal_pending(current)) {
1827 				err = sock_intr_errno(timeo);
1828 				goto out;
1829 			}
1830 			mutex_lock(&u->readlock);
1831 			continue;
1832  unlock:
1833 			unix_state_unlock(sk);
1834 			break;
1835 		}
1836 		unix_state_unlock(sk);
1837 
1838 		if (check_creds) {
1839 			/* Never glue messages from different writers */
1840 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1841 				   sizeof(siocb->scm->creds)) != 0) {
1842 				skb_queue_head(&sk->sk_receive_queue, skb);
1843 				break;
1844 			}
1845 		} else {
1846 			/* Copy credentials */
1847 			siocb->scm->creds = *UNIXCREDS(skb);
1848 			check_creds = 1;
1849 		}
1850 
1851 		/* Copy address just once */
1852 		if (sunaddr) {
1853 			unix_copy_addr(msg, skb->sk);
1854 			sunaddr = NULL;
1855 		}
1856 
1857 		chunk = min_t(unsigned int, skb->len, size);
1858 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1859 			skb_queue_head(&sk->sk_receive_queue, skb);
1860 			if (copied == 0)
1861 				copied = -EFAULT;
1862 			break;
1863 		}
1864 		copied += chunk;
1865 		size -= chunk;
1866 
1867 		/* Mark read part of skb as used */
1868 		if (!(flags & MSG_PEEK)) {
1869 			skb_pull(skb, chunk);
1870 
1871 			if (UNIXCB(skb).fp)
1872 				unix_detach_fds(siocb->scm, skb);
1873 
1874 			/* put the skb back if we didn't use it up.. */
1875 			if (skb->len) {
1876 				skb_queue_head(&sk->sk_receive_queue, skb);
1877 				break;
1878 			}
1879 
1880 			kfree_skb(skb);
1881 
1882 			if (siocb->scm->fp)
1883 				break;
1884 		} else {
1885 			/* It is questionable, see note in unix_dgram_recvmsg.
1886 			 */
1887 			if (UNIXCB(skb).fp)
1888 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1889 
1890 			/* put message back and return */
1891 			skb_queue_head(&sk->sk_receive_queue, skb);
1892 			break;
1893 		}
1894 	} while (size);
1895 
1896 	mutex_unlock(&u->readlock);
1897 	scm_recv(sock, msg, siocb->scm, flags);
1898 out:
1899 	return copied ? : err;
1900 }
1901 
1902 static int unix_shutdown(struct socket *sock, int mode)
1903 {
1904 	struct sock *sk = sock->sk;
1905 	struct sock *other;
1906 
1907 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1908 
1909 	if (mode) {
1910 		unix_state_lock(sk);
1911 		sk->sk_shutdown |= mode;
1912 		other = unix_peer(sk);
1913 		if (other)
1914 			sock_hold(other);
1915 		unix_state_unlock(sk);
1916 		sk->sk_state_change(sk);
1917 
1918 		if (other &&
1919 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1920 
1921 			int peer_mode = 0;
1922 
1923 			if (mode&RCV_SHUTDOWN)
1924 				peer_mode |= SEND_SHUTDOWN;
1925 			if (mode&SEND_SHUTDOWN)
1926 				peer_mode |= RCV_SHUTDOWN;
1927 			unix_state_lock(other);
1928 			other->sk_shutdown |= peer_mode;
1929 			unix_state_unlock(other);
1930 			other->sk_state_change(other);
1931 			read_lock(&other->sk_callback_lock);
1932 			if (peer_mode == SHUTDOWN_MASK)
1933 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1934 			else if (peer_mode & RCV_SHUTDOWN)
1935 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1936 			read_unlock(&other->sk_callback_lock);
1937 		}
1938 		if (other)
1939 			sock_put(other);
1940 	}
1941 	return 0;
1942 }
1943 
1944 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1945 {
1946 	struct sock *sk = sock->sk;
1947 	long amount = 0;
1948 	int err;
1949 
1950 	switch (cmd) {
1951 	case SIOCOUTQ:
1952 		amount = sk_wmem_alloc_get(sk);
1953 		err = put_user(amount, (int __user *)arg);
1954 		break;
1955 	case SIOCINQ:
1956 		{
1957 			struct sk_buff *skb;
1958 
1959 			if (sk->sk_state == TCP_LISTEN) {
1960 				err = -EINVAL;
1961 				break;
1962 			}
1963 
1964 			spin_lock(&sk->sk_receive_queue.lock);
1965 			if (sk->sk_type == SOCK_STREAM ||
1966 			    sk->sk_type == SOCK_SEQPACKET) {
1967 				skb_queue_walk(&sk->sk_receive_queue, skb)
1968 					amount += skb->len;
1969 			} else {
1970 				skb = skb_peek(&sk->sk_receive_queue);
1971 				if (skb)
1972 					amount = skb->len;
1973 			}
1974 			spin_unlock(&sk->sk_receive_queue.lock);
1975 			err = put_user(amount, (int __user *)arg);
1976 			break;
1977 		}
1978 
1979 	default:
1980 		err = -ENOIOCTLCMD;
1981 		break;
1982 	}
1983 	return err;
1984 }
1985 
1986 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1987 {
1988 	struct sock *sk = sock->sk;
1989 	unsigned int mask;
1990 
1991 	sock_poll_wait(file, sk->sk_sleep, wait);
1992 	mask = 0;
1993 
1994 	/* exceptional events? */
1995 	if (sk->sk_err)
1996 		mask |= POLLERR;
1997 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1998 		mask |= POLLHUP;
1999 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2000 		mask |= POLLRDHUP;
2001 
2002 	/* readable? */
2003 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2004 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2005 		mask |= POLLIN | POLLRDNORM;
2006 
2007 	/* Connection-based need to check for termination and startup */
2008 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2009 	    sk->sk_state == TCP_CLOSE)
2010 		mask |= POLLHUP;
2011 
2012 	/*
2013 	 * we set writable also when the other side has shut down the
2014 	 * connection. This prevents stuck sockets.
2015 	 */
2016 	if (unix_writable(sk))
2017 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2018 
2019 	return mask;
2020 }
2021 
2022 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2023 				    poll_table *wait)
2024 {
2025 	struct sock *sk = sock->sk, *other;
2026 	unsigned int mask, writable;
2027 
2028 	sock_poll_wait(file, sk->sk_sleep, wait);
2029 	mask = 0;
2030 
2031 	/* exceptional events? */
2032 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2033 		mask |= POLLERR;
2034 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2035 		mask |= POLLRDHUP;
2036 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2037 		mask |= POLLHUP;
2038 
2039 	/* readable? */
2040 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2041 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2042 		mask |= POLLIN | POLLRDNORM;
2043 
2044 	/* Connection-based need to check for termination and startup */
2045 	if (sk->sk_type == SOCK_SEQPACKET) {
2046 		if (sk->sk_state == TCP_CLOSE)
2047 			mask |= POLLHUP;
2048 		/* connection hasn't started yet? */
2049 		if (sk->sk_state == TCP_SYN_SENT)
2050 			return mask;
2051 	}
2052 
2053 	/* writable? */
2054 	writable = unix_writable(sk);
2055 	if (writable) {
2056 		other = unix_peer_get(sk);
2057 		if (other) {
2058 			if (unix_peer(other) != sk) {
2059 				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2060 					  wait);
2061 				if (unix_recvq_full(other))
2062 					writable = 0;
2063 			}
2064 
2065 			sock_put(other);
2066 		}
2067 	}
2068 
2069 	if (writable)
2070 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2071 	else
2072 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2073 
2074 	return mask;
2075 }
2076 
2077 #ifdef CONFIG_PROC_FS
2078 static struct sock *first_unix_socket(int *i)
2079 {
2080 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2081 		if (!hlist_empty(&unix_socket_table[*i]))
2082 			return __sk_head(&unix_socket_table[*i]);
2083 	}
2084 	return NULL;
2085 }
2086 
2087 static struct sock *next_unix_socket(int *i, struct sock *s)
2088 {
2089 	struct sock *next = sk_next(s);
2090 	/* More in this chain? */
2091 	if (next)
2092 		return next;
2093 	/* Look for next non-empty chain. */
2094 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2095 		if (!hlist_empty(&unix_socket_table[*i]))
2096 			return __sk_head(&unix_socket_table[*i]);
2097 	}
2098 	return NULL;
2099 }
2100 
2101 struct unix_iter_state {
2102 	struct seq_net_private p;
2103 	int i;
2104 };
2105 
2106 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2107 {
2108 	struct unix_iter_state *iter = seq->private;
2109 	loff_t off = 0;
2110 	struct sock *s;
2111 
2112 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2113 		if (sock_net(s) != seq_file_net(seq))
2114 			continue;
2115 		if (off == pos)
2116 			return s;
2117 		++off;
2118 	}
2119 	return NULL;
2120 }
2121 
2122 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2123 	__acquires(unix_table_lock)
2124 {
2125 	spin_lock(&unix_table_lock);
2126 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2127 }
2128 
2129 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2130 {
2131 	struct unix_iter_state *iter = seq->private;
2132 	struct sock *sk = v;
2133 	++*pos;
2134 
2135 	if (v == SEQ_START_TOKEN)
2136 		sk = first_unix_socket(&iter->i);
2137 	else
2138 		sk = next_unix_socket(&iter->i, sk);
2139 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2140 		sk = next_unix_socket(&iter->i, sk);
2141 	return sk;
2142 }
2143 
2144 static void unix_seq_stop(struct seq_file *seq, void *v)
2145 	__releases(unix_table_lock)
2146 {
2147 	spin_unlock(&unix_table_lock);
2148 }
2149 
2150 static int unix_seq_show(struct seq_file *seq, void *v)
2151 {
2152 
2153 	if (v == SEQ_START_TOKEN)
2154 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2155 			 "Inode Path\n");
2156 	else {
2157 		struct sock *s = v;
2158 		struct unix_sock *u = unix_sk(s);
2159 		unix_state_lock(s);
2160 
2161 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2162 			s,
2163 			atomic_read(&s->sk_refcnt),
2164 			0,
2165 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2166 			s->sk_type,
2167 			s->sk_socket ?
2168 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2169 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2170 			sock_i_ino(s));
2171 
2172 		if (u->addr) {
2173 			int i, len;
2174 			seq_putc(seq, ' ');
2175 
2176 			i = 0;
2177 			len = u->addr->len - sizeof(short);
2178 			if (!UNIX_ABSTRACT(s))
2179 				len--;
2180 			else {
2181 				seq_putc(seq, '@');
2182 				i++;
2183 			}
2184 			for ( ; i < len; i++)
2185 				seq_putc(seq, u->addr->name->sun_path[i]);
2186 		}
2187 		unix_state_unlock(s);
2188 		seq_putc(seq, '\n');
2189 	}
2190 
2191 	return 0;
2192 }
2193 
2194 static const struct seq_operations unix_seq_ops = {
2195 	.start  = unix_seq_start,
2196 	.next   = unix_seq_next,
2197 	.stop   = unix_seq_stop,
2198 	.show   = unix_seq_show,
2199 };
2200 
2201 static int unix_seq_open(struct inode *inode, struct file *file)
2202 {
2203 	return seq_open_net(inode, file, &unix_seq_ops,
2204 			    sizeof(struct unix_iter_state));
2205 }
2206 
2207 static const struct file_operations unix_seq_fops = {
2208 	.owner		= THIS_MODULE,
2209 	.open		= unix_seq_open,
2210 	.read		= seq_read,
2211 	.llseek		= seq_lseek,
2212 	.release	= seq_release_net,
2213 };
2214 
2215 #endif
2216 
2217 static struct net_proto_family unix_family_ops = {
2218 	.family = PF_UNIX,
2219 	.create = unix_create,
2220 	.owner	= THIS_MODULE,
2221 };
2222 
2223 
2224 static int unix_net_init(struct net *net)
2225 {
2226 	int error = -ENOMEM;
2227 
2228 	net->unx.sysctl_max_dgram_qlen = 10;
2229 	if (unix_sysctl_register(net))
2230 		goto out;
2231 
2232 #ifdef CONFIG_PROC_FS
2233 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2234 		unix_sysctl_unregister(net);
2235 		goto out;
2236 	}
2237 #endif
2238 	error = 0;
2239 out:
2240 	return error;
2241 }
2242 
2243 static void unix_net_exit(struct net *net)
2244 {
2245 	unix_sysctl_unregister(net);
2246 	proc_net_remove(net, "unix");
2247 }
2248 
2249 static struct pernet_operations unix_net_ops = {
2250 	.init = unix_net_init,
2251 	.exit = unix_net_exit,
2252 };
2253 
2254 static int __init af_unix_init(void)
2255 {
2256 	int rc = -1;
2257 	struct sk_buff *dummy_skb;
2258 
2259 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2260 
2261 	rc = proto_register(&unix_proto, 1);
2262 	if (rc != 0) {
2263 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2264 		       __func__);
2265 		goto out;
2266 	}
2267 
2268 	sock_register(&unix_family_ops);
2269 	register_pernet_subsys(&unix_net_ops);
2270 out:
2271 	return rc;
2272 }
2273 
2274 static void __exit af_unix_exit(void)
2275 {
2276 	sock_unregister(PF_UNIX);
2277 	proto_unregister(&unix_proto);
2278 	unregister_pernet_subsys(&unix_net_ops);
2279 }
2280 
2281 /* Earlier than device_initcall() so that other drivers invoking
2282    request_module() don't end up in a loop when modprobe tries
2283    to use a UNIX socket. But later than subsys_initcall() because
2284    we depend on stuff initialised there */
2285 fs_initcall(af_unix_init);
2286 module_exit(af_unix_exit);
2287 
2288 MODULE_LICENSE("GPL");
2289 MODULE_ALIAS_NETPROTO(PF_UNIX);
2290