xref: /openbmc/linux/net/unix/af_unix.c (revision 7490ca1e)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 EXPORT_SYMBOL_GPL(unix_socket_table);
120 DEFINE_SPINLOCK(unix_table_lock);
121 EXPORT_SYMBOL_GPL(unix_table_lock);
122 static atomic_long_t unix_nr_socks;
123 
124 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
125 
126 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
127 
128 #ifdef CONFIG_SECURITY_NETWORK
129 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
130 {
131 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
132 }
133 
134 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135 {
136 	scm->secid = *UNIXSID(skb);
137 }
138 #else
139 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 { }
141 
142 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 { }
144 #endif /* CONFIG_SECURITY_NETWORK */
145 
146 /*
147  *  SMP locking strategy:
148  *    hash table is protected with spinlock unix_table_lock
149  *    each socket state is protected by separate spin lock.
150  */
151 
152 static inline unsigned unix_hash_fold(__wsum n)
153 {
154 	unsigned hash = (__force unsigned)n;
155 	hash ^= hash>>16;
156 	hash ^= hash>>8;
157 	return hash&(UNIX_HASH_SIZE-1);
158 }
159 
160 #define unix_peer(sk) (unix_sk(sk)->peer)
161 
162 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
163 {
164 	return unix_peer(osk) == sk;
165 }
166 
167 static inline int unix_may_send(struct sock *sk, struct sock *osk)
168 {
169 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
170 }
171 
172 static inline int unix_recvq_full(struct sock const *sk)
173 {
174 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175 }
176 
177 struct sock *unix_peer_get(struct sock *s)
178 {
179 	struct sock *peer;
180 
181 	unix_state_lock(s);
182 	peer = unix_peer(s);
183 	if (peer)
184 		sock_hold(peer);
185 	unix_state_unlock(s);
186 	return peer;
187 }
188 EXPORT_SYMBOL_GPL(unix_peer_get);
189 
190 static inline void unix_release_addr(struct unix_address *addr)
191 {
192 	if (atomic_dec_and_test(&addr->refcnt))
193 		kfree(addr);
194 }
195 
196 /*
197  *	Check unix socket name:
198  *		- should be not zero length.
199  *	        - if started by not zero, should be NULL terminated (FS object)
200  *		- if started by zero, it is abstract name.
201  */
202 
203 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
204 {
205 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
206 		return -EINVAL;
207 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
208 		return -EINVAL;
209 	if (sunaddr->sun_path[0]) {
210 		/*
211 		 * This may look like an off by one error but it is a bit more
212 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
213 		 * sun_path[108] doesn't as such exist.  However in kernel space
214 		 * we are guaranteed that it is a valid memory location in our
215 		 * kernel address buffer.
216 		 */
217 		((char *)sunaddr)[len] = 0;
218 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
219 		return len;
220 	}
221 
222 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
223 	return len;
224 }
225 
226 static void __unix_remove_socket(struct sock *sk)
227 {
228 	sk_del_node_init(sk);
229 }
230 
231 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
232 {
233 	WARN_ON(!sk_unhashed(sk));
234 	sk_add_node(sk, list);
235 }
236 
237 static inline void unix_remove_socket(struct sock *sk)
238 {
239 	spin_lock(&unix_table_lock);
240 	__unix_remove_socket(sk);
241 	spin_unlock(&unix_table_lock);
242 }
243 
244 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
245 {
246 	spin_lock(&unix_table_lock);
247 	__unix_insert_socket(list, sk);
248 	spin_unlock(&unix_table_lock);
249 }
250 
251 static struct sock *__unix_find_socket_byname(struct net *net,
252 					      struct sockaddr_un *sunname,
253 					      int len, int type, unsigned hash)
254 {
255 	struct sock *s;
256 	struct hlist_node *node;
257 
258 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
259 		struct unix_sock *u = unix_sk(s);
260 
261 		if (!net_eq(sock_net(s), net))
262 			continue;
263 
264 		if (u->addr->len == len &&
265 		    !memcmp(u->addr->name, sunname, len))
266 			goto found;
267 	}
268 	s = NULL;
269 found:
270 	return s;
271 }
272 
273 static inline struct sock *unix_find_socket_byname(struct net *net,
274 						   struct sockaddr_un *sunname,
275 						   int len, int type,
276 						   unsigned hash)
277 {
278 	struct sock *s;
279 
280 	spin_lock(&unix_table_lock);
281 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
282 	if (s)
283 		sock_hold(s);
284 	spin_unlock(&unix_table_lock);
285 	return s;
286 }
287 
288 static struct sock *unix_find_socket_byinode(struct inode *i)
289 {
290 	struct sock *s;
291 	struct hlist_node *node;
292 
293 	spin_lock(&unix_table_lock);
294 	sk_for_each(s, node,
295 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
296 		struct dentry *dentry = unix_sk(s)->dentry;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
314 static void unix_write_space(struct sock *sk)
315 {
316 	struct socket_wq *wq;
317 
318 	rcu_read_lock();
319 	if (unix_writable(sk)) {
320 		wq = rcu_dereference(sk->sk_wq);
321 		if (wq_has_sleeper(wq))
322 			wake_up_interruptible_sync_poll(&wq->wait,
323 				POLLOUT | POLLWRNORM | POLLWRBAND);
324 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
325 	}
326 	rcu_read_unlock();
327 }
328 
329 /* When dgram socket disconnects (or changes its peer), we clear its receive
330  * queue of packets arrived from previous peer. First, it allows to do
331  * flow control based only on wmem_alloc; second, sk connected to peer
332  * may receive messages only from that peer. */
333 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
334 {
335 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
336 		skb_queue_purge(&sk->sk_receive_queue);
337 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
338 
339 		/* If one link of bidirectional dgram pipe is disconnected,
340 		 * we signal error. Messages are lost. Do not make this,
341 		 * when peer was not connected to us.
342 		 */
343 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
344 			other->sk_err = ECONNRESET;
345 			other->sk_error_report(other);
346 		}
347 	}
348 }
349 
350 static void unix_sock_destructor(struct sock *sk)
351 {
352 	struct unix_sock *u = unix_sk(sk);
353 
354 	skb_queue_purge(&sk->sk_receive_queue);
355 
356 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
357 	WARN_ON(!sk_unhashed(sk));
358 	WARN_ON(sk->sk_socket);
359 	if (!sock_flag(sk, SOCK_DEAD)) {
360 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
361 		return;
362 	}
363 
364 	if (u->addr)
365 		unix_release_addr(u->addr);
366 
367 	atomic_long_dec(&unix_nr_socks);
368 	local_bh_disable();
369 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
370 	local_bh_enable();
371 #ifdef UNIX_REFCNT_DEBUG
372 	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
373 		atomic_long_read(&unix_nr_socks));
374 #endif
375 }
376 
377 static int unix_release_sock(struct sock *sk, int embrion)
378 {
379 	struct unix_sock *u = unix_sk(sk);
380 	struct dentry *dentry;
381 	struct vfsmount *mnt;
382 	struct sock *skpair;
383 	struct sk_buff *skb;
384 	int state;
385 
386 	unix_remove_socket(sk);
387 
388 	/* Clear state */
389 	unix_state_lock(sk);
390 	sock_orphan(sk);
391 	sk->sk_shutdown = SHUTDOWN_MASK;
392 	dentry	     = u->dentry;
393 	u->dentry    = NULL;
394 	mnt	     = u->mnt;
395 	u->mnt	     = NULL;
396 	state = sk->sk_state;
397 	sk->sk_state = TCP_CLOSE;
398 	unix_state_unlock(sk);
399 
400 	wake_up_interruptible_all(&u->peer_wait);
401 
402 	skpair = unix_peer(sk);
403 
404 	if (skpair != NULL) {
405 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
406 			unix_state_lock(skpair);
407 			/* No more writes */
408 			skpair->sk_shutdown = SHUTDOWN_MASK;
409 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
410 				skpair->sk_err = ECONNRESET;
411 			unix_state_unlock(skpair);
412 			skpair->sk_state_change(skpair);
413 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
414 		}
415 		sock_put(skpair); /* It may now die */
416 		unix_peer(sk) = NULL;
417 	}
418 
419 	/* Try to flush out this socket. Throw out buffers at least */
420 
421 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
422 		if (state == TCP_LISTEN)
423 			unix_release_sock(skb->sk, 1);
424 		/* passed fds are erased in the kfree_skb hook	      */
425 		kfree_skb(skb);
426 	}
427 
428 	if (dentry) {
429 		dput(dentry);
430 		mntput(mnt);
431 	}
432 
433 	sock_put(sk);
434 
435 	/* ---- Socket is dead now and most probably destroyed ---- */
436 
437 	/*
438 	 * Fixme: BSD difference: In BSD all sockets connected to use get
439 	 *	  ECONNRESET and we die on the spot. In Linux we behave
440 	 *	  like files and pipes do and wait for the last
441 	 *	  dereference.
442 	 *
443 	 * Can't we simply set sock->err?
444 	 *
445 	 *	  What the above comment does talk about? --ANK(980817)
446 	 */
447 
448 	if (unix_tot_inflight)
449 		unix_gc();		/* Garbage collect fds */
450 
451 	return 0;
452 }
453 
454 static void init_peercred(struct sock *sk)
455 {
456 	put_pid(sk->sk_peer_pid);
457 	if (sk->sk_peer_cred)
458 		put_cred(sk->sk_peer_cred);
459 	sk->sk_peer_pid  = get_pid(task_tgid(current));
460 	sk->sk_peer_cred = get_current_cred();
461 }
462 
463 static void copy_peercred(struct sock *sk, struct sock *peersk)
464 {
465 	put_pid(sk->sk_peer_pid);
466 	if (sk->sk_peer_cred)
467 		put_cred(sk->sk_peer_cred);
468 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
469 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
470 }
471 
472 static int unix_listen(struct socket *sock, int backlog)
473 {
474 	int err;
475 	struct sock *sk = sock->sk;
476 	struct unix_sock *u = unix_sk(sk);
477 	struct pid *old_pid = NULL;
478 	const struct cred *old_cred = NULL;
479 
480 	err = -EOPNOTSUPP;
481 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
482 		goto out;	/* Only stream/seqpacket sockets accept */
483 	err = -EINVAL;
484 	if (!u->addr)
485 		goto out;	/* No listens on an unbound socket */
486 	unix_state_lock(sk);
487 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
488 		goto out_unlock;
489 	if (backlog > sk->sk_max_ack_backlog)
490 		wake_up_interruptible_all(&u->peer_wait);
491 	sk->sk_max_ack_backlog	= backlog;
492 	sk->sk_state		= TCP_LISTEN;
493 	/* set credentials so connect can copy them */
494 	init_peercred(sk);
495 	err = 0;
496 
497 out_unlock:
498 	unix_state_unlock(sk);
499 	put_pid(old_pid);
500 	if (old_cred)
501 		put_cred(old_cred);
502 out:
503 	return err;
504 }
505 
506 static int unix_release(struct socket *);
507 static int unix_bind(struct socket *, struct sockaddr *, int);
508 static int unix_stream_connect(struct socket *, struct sockaddr *,
509 			       int addr_len, int flags);
510 static int unix_socketpair(struct socket *, struct socket *);
511 static int unix_accept(struct socket *, struct socket *, int);
512 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
513 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
514 static unsigned int unix_dgram_poll(struct file *, struct socket *,
515 				    poll_table *);
516 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
517 static int unix_shutdown(struct socket *, int);
518 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
519 			       struct msghdr *, size_t);
520 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
521 			       struct msghdr *, size_t, int);
522 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
523 			      struct msghdr *, size_t);
524 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
525 			      struct msghdr *, size_t, int);
526 static int unix_dgram_connect(struct socket *, struct sockaddr *,
527 			      int, int);
528 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
529 				  struct msghdr *, size_t);
530 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
531 				  struct msghdr *, size_t, int);
532 
533 static const struct proto_ops unix_stream_ops = {
534 	.family =	PF_UNIX,
535 	.owner =	THIS_MODULE,
536 	.release =	unix_release,
537 	.bind =		unix_bind,
538 	.connect =	unix_stream_connect,
539 	.socketpair =	unix_socketpair,
540 	.accept =	unix_accept,
541 	.getname =	unix_getname,
542 	.poll =		unix_poll,
543 	.ioctl =	unix_ioctl,
544 	.listen =	unix_listen,
545 	.shutdown =	unix_shutdown,
546 	.setsockopt =	sock_no_setsockopt,
547 	.getsockopt =	sock_no_getsockopt,
548 	.sendmsg =	unix_stream_sendmsg,
549 	.recvmsg =	unix_stream_recvmsg,
550 	.mmap =		sock_no_mmap,
551 	.sendpage =	sock_no_sendpage,
552 };
553 
554 static const struct proto_ops unix_dgram_ops = {
555 	.family =	PF_UNIX,
556 	.owner =	THIS_MODULE,
557 	.release =	unix_release,
558 	.bind =		unix_bind,
559 	.connect =	unix_dgram_connect,
560 	.socketpair =	unix_socketpair,
561 	.accept =	sock_no_accept,
562 	.getname =	unix_getname,
563 	.poll =		unix_dgram_poll,
564 	.ioctl =	unix_ioctl,
565 	.listen =	sock_no_listen,
566 	.shutdown =	unix_shutdown,
567 	.setsockopt =	sock_no_setsockopt,
568 	.getsockopt =	sock_no_getsockopt,
569 	.sendmsg =	unix_dgram_sendmsg,
570 	.recvmsg =	unix_dgram_recvmsg,
571 	.mmap =		sock_no_mmap,
572 	.sendpage =	sock_no_sendpage,
573 };
574 
575 static const struct proto_ops unix_seqpacket_ops = {
576 	.family =	PF_UNIX,
577 	.owner =	THIS_MODULE,
578 	.release =	unix_release,
579 	.bind =		unix_bind,
580 	.connect =	unix_stream_connect,
581 	.socketpair =	unix_socketpair,
582 	.accept =	unix_accept,
583 	.getname =	unix_getname,
584 	.poll =		unix_dgram_poll,
585 	.ioctl =	unix_ioctl,
586 	.listen =	unix_listen,
587 	.shutdown =	unix_shutdown,
588 	.setsockopt =	sock_no_setsockopt,
589 	.getsockopt =	sock_no_getsockopt,
590 	.sendmsg =	unix_seqpacket_sendmsg,
591 	.recvmsg =	unix_seqpacket_recvmsg,
592 	.mmap =		sock_no_mmap,
593 	.sendpage =	sock_no_sendpage,
594 };
595 
596 static struct proto unix_proto = {
597 	.name			= "UNIX",
598 	.owner			= THIS_MODULE,
599 	.obj_size		= sizeof(struct unix_sock),
600 };
601 
602 /*
603  * AF_UNIX sockets do not interact with hardware, hence they
604  * dont trigger interrupts - so it's safe for them to have
605  * bh-unsafe locking for their sk_receive_queue.lock. Split off
606  * this special lock-class by reinitializing the spinlock key:
607  */
608 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
609 
610 static struct sock *unix_create1(struct net *net, struct socket *sock)
611 {
612 	struct sock *sk = NULL;
613 	struct unix_sock *u;
614 
615 	atomic_long_inc(&unix_nr_socks);
616 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
617 		goto out;
618 
619 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
620 	if (!sk)
621 		goto out;
622 
623 	sock_init_data(sock, sk);
624 	lockdep_set_class(&sk->sk_receive_queue.lock,
625 				&af_unix_sk_receive_queue_lock_key);
626 
627 	sk->sk_write_space	= unix_write_space;
628 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
629 	sk->sk_destruct		= unix_sock_destructor;
630 	u	  = unix_sk(sk);
631 	u->dentry = NULL;
632 	u->mnt	  = NULL;
633 	spin_lock_init(&u->lock);
634 	atomic_long_set(&u->inflight, 0);
635 	INIT_LIST_HEAD(&u->link);
636 	mutex_init(&u->readlock); /* single task reading lock */
637 	init_waitqueue_head(&u->peer_wait);
638 	unix_insert_socket(unix_sockets_unbound, sk);
639 out:
640 	if (sk == NULL)
641 		atomic_long_dec(&unix_nr_socks);
642 	else {
643 		local_bh_disable();
644 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
645 		local_bh_enable();
646 	}
647 	return sk;
648 }
649 
650 static int unix_create(struct net *net, struct socket *sock, int protocol,
651 		       int kern)
652 {
653 	if (protocol && protocol != PF_UNIX)
654 		return -EPROTONOSUPPORT;
655 
656 	sock->state = SS_UNCONNECTED;
657 
658 	switch (sock->type) {
659 	case SOCK_STREAM:
660 		sock->ops = &unix_stream_ops;
661 		break;
662 		/*
663 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
664 		 *	nothing uses it.
665 		 */
666 	case SOCK_RAW:
667 		sock->type = SOCK_DGRAM;
668 	case SOCK_DGRAM:
669 		sock->ops = &unix_dgram_ops;
670 		break;
671 	case SOCK_SEQPACKET:
672 		sock->ops = &unix_seqpacket_ops;
673 		break;
674 	default:
675 		return -ESOCKTNOSUPPORT;
676 	}
677 
678 	return unix_create1(net, sock) ? 0 : -ENOMEM;
679 }
680 
681 static int unix_release(struct socket *sock)
682 {
683 	struct sock *sk = sock->sk;
684 
685 	if (!sk)
686 		return 0;
687 
688 	sock->sk = NULL;
689 
690 	return unix_release_sock(sk, 0);
691 }
692 
693 static int unix_autobind(struct socket *sock)
694 {
695 	struct sock *sk = sock->sk;
696 	struct net *net = sock_net(sk);
697 	struct unix_sock *u = unix_sk(sk);
698 	static u32 ordernum = 1;
699 	struct unix_address *addr;
700 	int err;
701 	unsigned int retries = 0;
702 
703 	mutex_lock(&u->readlock);
704 
705 	err = 0;
706 	if (u->addr)
707 		goto out;
708 
709 	err = -ENOMEM;
710 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
711 	if (!addr)
712 		goto out;
713 
714 	addr->name->sun_family = AF_UNIX;
715 	atomic_set(&addr->refcnt, 1);
716 
717 retry:
718 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
719 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
720 
721 	spin_lock(&unix_table_lock);
722 	ordernum = (ordernum+1)&0xFFFFF;
723 
724 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
725 				      addr->hash)) {
726 		spin_unlock(&unix_table_lock);
727 		/*
728 		 * __unix_find_socket_byname() may take long time if many names
729 		 * are already in use.
730 		 */
731 		cond_resched();
732 		/* Give up if all names seems to be in use. */
733 		if (retries++ == 0xFFFFF) {
734 			err = -ENOSPC;
735 			kfree(addr);
736 			goto out;
737 		}
738 		goto retry;
739 	}
740 	addr->hash ^= sk->sk_type;
741 
742 	__unix_remove_socket(sk);
743 	u->addr = addr;
744 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
745 	spin_unlock(&unix_table_lock);
746 	err = 0;
747 
748 out:	mutex_unlock(&u->readlock);
749 	return err;
750 }
751 
752 static struct sock *unix_find_other(struct net *net,
753 				    struct sockaddr_un *sunname, int len,
754 				    int type, unsigned hash, int *error)
755 {
756 	struct sock *u;
757 	struct path path;
758 	int err = 0;
759 
760 	if (sunname->sun_path[0]) {
761 		struct inode *inode;
762 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
763 		if (err)
764 			goto fail;
765 		inode = path.dentry->d_inode;
766 		err = inode_permission(inode, MAY_WRITE);
767 		if (err)
768 			goto put_fail;
769 
770 		err = -ECONNREFUSED;
771 		if (!S_ISSOCK(inode->i_mode))
772 			goto put_fail;
773 		u = unix_find_socket_byinode(inode);
774 		if (!u)
775 			goto put_fail;
776 
777 		if (u->sk_type == type)
778 			touch_atime(path.mnt, path.dentry);
779 
780 		path_put(&path);
781 
782 		err = -EPROTOTYPE;
783 		if (u->sk_type != type) {
784 			sock_put(u);
785 			goto fail;
786 		}
787 	} else {
788 		err = -ECONNREFUSED;
789 		u = unix_find_socket_byname(net, sunname, len, type, hash);
790 		if (u) {
791 			struct dentry *dentry;
792 			dentry = unix_sk(u)->dentry;
793 			if (dentry)
794 				touch_atime(unix_sk(u)->mnt, dentry);
795 		} else
796 			goto fail;
797 	}
798 	return u;
799 
800 put_fail:
801 	path_put(&path);
802 fail:
803 	*error = err;
804 	return NULL;
805 }
806 
807 
808 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
809 {
810 	struct sock *sk = sock->sk;
811 	struct net *net = sock_net(sk);
812 	struct unix_sock *u = unix_sk(sk);
813 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
814 	char *sun_path = sunaddr->sun_path;
815 	struct dentry *dentry = NULL;
816 	struct path path;
817 	int err;
818 	unsigned hash;
819 	struct unix_address *addr;
820 	struct hlist_head *list;
821 
822 	err = -EINVAL;
823 	if (sunaddr->sun_family != AF_UNIX)
824 		goto out;
825 
826 	if (addr_len == sizeof(short)) {
827 		err = unix_autobind(sock);
828 		goto out;
829 	}
830 
831 	err = unix_mkname(sunaddr, addr_len, &hash);
832 	if (err < 0)
833 		goto out;
834 	addr_len = err;
835 
836 	mutex_lock(&u->readlock);
837 
838 	err = -EINVAL;
839 	if (u->addr)
840 		goto out_up;
841 
842 	err = -ENOMEM;
843 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
844 	if (!addr)
845 		goto out_up;
846 
847 	memcpy(addr->name, sunaddr, addr_len);
848 	addr->len = addr_len;
849 	addr->hash = hash ^ sk->sk_type;
850 	atomic_set(&addr->refcnt, 1);
851 
852 	if (sun_path[0]) {
853 		umode_t mode;
854 		err = 0;
855 		/*
856 		 * Get the parent directory, calculate the hash for last
857 		 * component.
858 		 */
859 		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
860 		err = PTR_ERR(dentry);
861 		if (IS_ERR(dentry))
862 			goto out_mknod_parent;
863 
864 		/*
865 		 * All right, let's create it.
866 		 */
867 		mode = S_IFSOCK |
868 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
869 		err = mnt_want_write(path.mnt);
870 		if (err)
871 			goto out_mknod_dput;
872 		err = security_path_mknod(&path, dentry, mode, 0);
873 		if (err)
874 			goto out_mknod_drop_write;
875 		err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
876 out_mknod_drop_write:
877 		mnt_drop_write(path.mnt);
878 		if (err)
879 			goto out_mknod_dput;
880 		mutex_unlock(&path.dentry->d_inode->i_mutex);
881 		dput(path.dentry);
882 		path.dentry = dentry;
883 
884 		addr->hash = UNIX_HASH_SIZE;
885 	}
886 
887 	spin_lock(&unix_table_lock);
888 
889 	if (!sun_path[0]) {
890 		err = -EADDRINUSE;
891 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
892 					      sk->sk_type, hash)) {
893 			unix_release_addr(addr);
894 			goto out_unlock;
895 		}
896 
897 		list = &unix_socket_table[addr->hash];
898 	} else {
899 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
900 		u->dentry = path.dentry;
901 		u->mnt    = path.mnt;
902 	}
903 
904 	err = 0;
905 	__unix_remove_socket(sk);
906 	u->addr = addr;
907 	__unix_insert_socket(list, sk);
908 
909 out_unlock:
910 	spin_unlock(&unix_table_lock);
911 out_up:
912 	mutex_unlock(&u->readlock);
913 out:
914 	return err;
915 
916 out_mknod_dput:
917 	dput(dentry);
918 	mutex_unlock(&path.dentry->d_inode->i_mutex);
919 	path_put(&path);
920 out_mknod_parent:
921 	if (err == -EEXIST)
922 		err = -EADDRINUSE;
923 	unix_release_addr(addr);
924 	goto out_up;
925 }
926 
927 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
928 {
929 	if (unlikely(sk1 == sk2) || !sk2) {
930 		unix_state_lock(sk1);
931 		return;
932 	}
933 	if (sk1 < sk2) {
934 		unix_state_lock(sk1);
935 		unix_state_lock_nested(sk2);
936 	} else {
937 		unix_state_lock(sk2);
938 		unix_state_lock_nested(sk1);
939 	}
940 }
941 
942 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
943 {
944 	if (unlikely(sk1 == sk2) || !sk2) {
945 		unix_state_unlock(sk1);
946 		return;
947 	}
948 	unix_state_unlock(sk1);
949 	unix_state_unlock(sk2);
950 }
951 
952 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
953 			      int alen, int flags)
954 {
955 	struct sock *sk = sock->sk;
956 	struct net *net = sock_net(sk);
957 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
958 	struct sock *other;
959 	unsigned hash;
960 	int err;
961 
962 	if (addr->sa_family != AF_UNSPEC) {
963 		err = unix_mkname(sunaddr, alen, &hash);
964 		if (err < 0)
965 			goto out;
966 		alen = err;
967 
968 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
969 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
970 			goto out;
971 
972 restart:
973 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
974 		if (!other)
975 			goto out;
976 
977 		unix_state_double_lock(sk, other);
978 
979 		/* Apparently VFS overslept socket death. Retry. */
980 		if (sock_flag(other, SOCK_DEAD)) {
981 			unix_state_double_unlock(sk, other);
982 			sock_put(other);
983 			goto restart;
984 		}
985 
986 		err = -EPERM;
987 		if (!unix_may_send(sk, other))
988 			goto out_unlock;
989 
990 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
991 		if (err)
992 			goto out_unlock;
993 
994 	} else {
995 		/*
996 		 *	1003.1g breaking connected state with AF_UNSPEC
997 		 */
998 		other = NULL;
999 		unix_state_double_lock(sk, other);
1000 	}
1001 
1002 	/*
1003 	 * If it was connected, reconnect.
1004 	 */
1005 	if (unix_peer(sk)) {
1006 		struct sock *old_peer = unix_peer(sk);
1007 		unix_peer(sk) = other;
1008 		unix_state_double_unlock(sk, other);
1009 
1010 		if (other != old_peer)
1011 			unix_dgram_disconnected(sk, old_peer);
1012 		sock_put(old_peer);
1013 	} else {
1014 		unix_peer(sk) = other;
1015 		unix_state_double_unlock(sk, other);
1016 	}
1017 	return 0;
1018 
1019 out_unlock:
1020 	unix_state_double_unlock(sk, other);
1021 	sock_put(other);
1022 out:
1023 	return err;
1024 }
1025 
1026 static long unix_wait_for_peer(struct sock *other, long timeo)
1027 {
1028 	struct unix_sock *u = unix_sk(other);
1029 	int sched;
1030 	DEFINE_WAIT(wait);
1031 
1032 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1033 
1034 	sched = !sock_flag(other, SOCK_DEAD) &&
1035 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1036 		unix_recvq_full(other);
1037 
1038 	unix_state_unlock(other);
1039 
1040 	if (sched)
1041 		timeo = schedule_timeout(timeo);
1042 
1043 	finish_wait(&u->peer_wait, &wait);
1044 	return timeo;
1045 }
1046 
1047 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1048 			       int addr_len, int flags)
1049 {
1050 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1051 	struct sock *sk = sock->sk;
1052 	struct net *net = sock_net(sk);
1053 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1054 	struct sock *newsk = NULL;
1055 	struct sock *other = NULL;
1056 	struct sk_buff *skb = NULL;
1057 	unsigned hash;
1058 	int st;
1059 	int err;
1060 	long timeo;
1061 
1062 	err = unix_mkname(sunaddr, addr_len, &hash);
1063 	if (err < 0)
1064 		goto out;
1065 	addr_len = err;
1066 
1067 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1068 	    (err = unix_autobind(sock)) != 0)
1069 		goto out;
1070 
1071 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1072 
1073 	/* First of all allocate resources.
1074 	   If we will make it after state is locked,
1075 	   we will have to recheck all again in any case.
1076 	 */
1077 
1078 	err = -ENOMEM;
1079 
1080 	/* create new sock for complete connection */
1081 	newsk = unix_create1(sock_net(sk), NULL);
1082 	if (newsk == NULL)
1083 		goto out;
1084 
1085 	/* Allocate skb for sending to listening sock */
1086 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1087 	if (skb == NULL)
1088 		goto out;
1089 
1090 restart:
1091 	/*  Find listening sock. */
1092 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1093 	if (!other)
1094 		goto out;
1095 
1096 	/* Latch state of peer */
1097 	unix_state_lock(other);
1098 
1099 	/* Apparently VFS overslept socket death. Retry. */
1100 	if (sock_flag(other, SOCK_DEAD)) {
1101 		unix_state_unlock(other);
1102 		sock_put(other);
1103 		goto restart;
1104 	}
1105 
1106 	err = -ECONNREFUSED;
1107 	if (other->sk_state != TCP_LISTEN)
1108 		goto out_unlock;
1109 	if (other->sk_shutdown & RCV_SHUTDOWN)
1110 		goto out_unlock;
1111 
1112 	if (unix_recvq_full(other)) {
1113 		err = -EAGAIN;
1114 		if (!timeo)
1115 			goto out_unlock;
1116 
1117 		timeo = unix_wait_for_peer(other, timeo);
1118 
1119 		err = sock_intr_errno(timeo);
1120 		if (signal_pending(current))
1121 			goto out;
1122 		sock_put(other);
1123 		goto restart;
1124 	}
1125 
1126 	/* Latch our state.
1127 
1128 	   It is tricky place. We need to grab our state lock and cannot
1129 	   drop lock on peer. It is dangerous because deadlock is
1130 	   possible. Connect to self case and simultaneous
1131 	   attempt to connect are eliminated by checking socket
1132 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1133 	   check this before attempt to grab lock.
1134 
1135 	   Well, and we have to recheck the state after socket locked.
1136 	 */
1137 	st = sk->sk_state;
1138 
1139 	switch (st) {
1140 	case TCP_CLOSE:
1141 		/* This is ok... continue with connect */
1142 		break;
1143 	case TCP_ESTABLISHED:
1144 		/* Socket is already connected */
1145 		err = -EISCONN;
1146 		goto out_unlock;
1147 	default:
1148 		err = -EINVAL;
1149 		goto out_unlock;
1150 	}
1151 
1152 	unix_state_lock_nested(sk);
1153 
1154 	if (sk->sk_state != st) {
1155 		unix_state_unlock(sk);
1156 		unix_state_unlock(other);
1157 		sock_put(other);
1158 		goto restart;
1159 	}
1160 
1161 	err = security_unix_stream_connect(sk, other, newsk);
1162 	if (err) {
1163 		unix_state_unlock(sk);
1164 		goto out_unlock;
1165 	}
1166 
1167 	/* The way is open! Fastly set all the necessary fields... */
1168 
1169 	sock_hold(sk);
1170 	unix_peer(newsk)	= sk;
1171 	newsk->sk_state		= TCP_ESTABLISHED;
1172 	newsk->sk_type		= sk->sk_type;
1173 	init_peercred(newsk);
1174 	newu = unix_sk(newsk);
1175 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1176 	otheru = unix_sk(other);
1177 
1178 	/* copy address information from listening to new sock*/
1179 	if (otheru->addr) {
1180 		atomic_inc(&otheru->addr->refcnt);
1181 		newu->addr = otheru->addr;
1182 	}
1183 	if (otheru->dentry) {
1184 		newu->dentry	= dget(otheru->dentry);
1185 		newu->mnt	= mntget(otheru->mnt);
1186 	}
1187 
1188 	/* Set credentials */
1189 	copy_peercred(sk, other);
1190 
1191 	sock->state	= SS_CONNECTED;
1192 	sk->sk_state	= TCP_ESTABLISHED;
1193 	sock_hold(newsk);
1194 
1195 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1196 	unix_peer(sk)	= newsk;
1197 
1198 	unix_state_unlock(sk);
1199 
1200 	/* take ten and and send info to listening sock */
1201 	spin_lock(&other->sk_receive_queue.lock);
1202 	__skb_queue_tail(&other->sk_receive_queue, skb);
1203 	spin_unlock(&other->sk_receive_queue.lock);
1204 	unix_state_unlock(other);
1205 	other->sk_data_ready(other, 0);
1206 	sock_put(other);
1207 	return 0;
1208 
1209 out_unlock:
1210 	if (other)
1211 		unix_state_unlock(other);
1212 
1213 out:
1214 	kfree_skb(skb);
1215 	if (newsk)
1216 		unix_release_sock(newsk, 0);
1217 	if (other)
1218 		sock_put(other);
1219 	return err;
1220 }
1221 
1222 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1223 {
1224 	struct sock *ska = socka->sk, *skb = sockb->sk;
1225 
1226 	/* Join our sockets back to back */
1227 	sock_hold(ska);
1228 	sock_hold(skb);
1229 	unix_peer(ska) = skb;
1230 	unix_peer(skb) = ska;
1231 	init_peercred(ska);
1232 	init_peercred(skb);
1233 
1234 	if (ska->sk_type != SOCK_DGRAM) {
1235 		ska->sk_state = TCP_ESTABLISHED;
1236 		skb->sk_state = TCP_ESTABLISHED;
1237 		socka->state  = SS_CONNECTED;
1238 		sockb->state  = SS_CONNECTED;
1239 	}
1240 	return 0;
1241 }
1242 
1243 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1244 {
1245 	struct sock *sk = sock->sk;
1246 	struct sock *tsk;
1247 	struct sk_buff *skb;
1248 	int err;
1249 
1250 	err = -EOPNOTSUPP;
1251 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1252 		goto out;
1253 
1254 	err = -EINVAL;
1255 	if (sk->sk_state != TCP_LISTEN)
1256 		goto out;
1257 
1258 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1259 	 * so that no locks are necessary.
1260 	 */
1261 
1262 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1263 	if (!skb) {
1264 		/* This means receive shutdown. */
1265 		if (err == 0)
1266 			err = -EINVAL;
1267 		goto out;
1268 	}
1269 
1270 	tsk = skb->sk;
1271 	skb_free_datagram(sk, skb);
1272 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1273 
1274 	/* attach accepted sock to socket */
1275 	unix_state_lock(tsk);
1276 	newsock->state = SS_CONNECTED;
1277 	sock_graft(tsk, newsock);
1278 	unix_state_unlock(tsk);
1279 	return 0;
1280 
1281 out:
1282 	return err;
1283 }
1284 
1285 
1286 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1287 {
1288 	struct sock *sk = sock->sk;
1289 	struct unix_sock *u;
1290 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1291 	int err = 0;
1292 
1293 	if (peer) {
1294 		sk = unix_peer_get(sk);
1295 
1296 		err = -ENOTCONN;
1297 		if (!sk)
1298 			goto out;
1299 		err = 0;
1300 	} else {
1301 		sock_hold(sk);
1302 	}
1303 
1304 	u = unix_sk(sk);
1305 	unix_state_lock(sk);
1306 	if (!u->addr) {
1307 		sunaddr->sun_family = AF_UNIX;
1308 		sunaddr->sun_path[0] = 0;
1309 		*uaddr_len = sizeof(short);
1310 	} else {
1311 		struct unix_address *addr = u->addr;
1312 
1313 		*uaddr_len = addr->len;
1314 		memcpy(sunaddr, addr->name, *uaddr_len);
1315 	}
1316 	unix_state_unlock(sk);
1317 	sock_put(sk);
1318 out:
1319 	return err;
1320 }
1321 
1322 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1323 {
1324 	int i;
1325 
1326 	scm->fp = UNIXCB(skb).fp;
1327 	UNIXCB(skb).fp = NULL;
1328 
1329 	for (i = scm->fp->count-1; i >= 0; i--)
1330 		unix_notinflight(scm->fp->fp[i]);
1331 }
1332 
1333 static void unix_destruct_scm(struct sk_buff *skb)
1334 {
1335 	struct scm_cookie scm;
1336 	memset(&scm, 0, sizeof(scm));
1337 	scm.pid  = UNIXCB(skb).pid;
1338 	scm.cred = UNIXCB(skb).cred;
1339 	if (UNIXCB(skb).fp)
1340 		unix_detach_fds(&scm, skb);
1341 
1342 	/* Alas, it calls VFS */
1343 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1344 	scm_destroy(&scm);
1345 	sock_wfree(skb);
1346 }
1347 
1348 #define MAX_RECURSION_LEVEL 4
1349 
1350 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1351 {
1352 	int i;
1353 	unsigned char max_level = 0;
1354 	int unix_sock_count = 0;
1355 
1356 	for (i = scm->fp->count - 1; i >= 0; i--) {
1357 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1358 
1359 		if (sk) {
1360 			unix_sock_count++;
1361 			max_level = max(max_level,
1362 					unix_sk(sk)->recursion_level);
1363 		}
1364 	}
1365 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1366 		return -ETOOMANYREFS;
1367 
1368 	/*
1369 	 * Need to duplicate file references for the sake of garbage
1370 	 * collection.  Otherwise a socket in the fps might become a
1371 	 * candidate for GC while the skb is not yet queued.
1372 	 */
1373 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1374 	if (!UNIXCB(skb).fp)
1375 		return -ENOMEM;
1376 
1377 	if (unix_sock_count) {
1378 		for (i = scm->fp->count - 1; i >= 0; i--)
1379 			unix_inflight(scm->fp->fp[i]);
1380 	}
1381 	return max_level;
1382 }
1383 
1384 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1385 {
1386 	int err = 0;
1387 
1388 	UNIXCB(skb).pid  = get_pid(scm->pid);
1389 	if (scm->cred)
1390 		UNIXCB(skb).cred = get_cred(scm->cred);
1391 	UNIXCB(skb).fp = NULL;
1392 	if (scm->fp && send_fds)
1393 		err = unix_attach_fds(scm, skb);
1394 
1395 	skb->destructor = unix_destruct_scm;
1396 	return err;
1397 }
1398 
1399 /*
1400  * Some apps rely on write() giving SCM_CREDENTIALS
1401  * We include credentials if source or destination socket
1402  * asserted SOCK_PASSCRED.
1403  */
1404 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1405 			    const struct sock *other)
1406 {
1407 	if (UNIXCB(skb).cred)
1408 		return;
1409 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1410 	    !other->sk_socket ||
1411 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1412 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1413 		UNIXCB(skb).cred = get_current_cred();
1414 	}
1415 }
1416 
1417 /*
1418  *	Send AF_UNIX data.
1419  */
1420 
1421 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1422 			      struct msghdr *msg, size_t len)
1423 {
1424 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1425 	struct sock *sk = sock->sk;
1426 	struct net *net = sock_net(sk);
1427 	struct unix_sock *u = unix_sk(sk);
1428 	struct sockaddr_un *sunaddr = msg->msg_name;
1429 	struct sock *other = NULL;
1430 	int namelen = 0; /* fake GCC */
1431 	int err;
1432 	unsigned hash;
1433 	struct sk_buff *skb;
1434 	long timeo;
1435 	struct scm_cookie tmp_scm;
1436 	int max_level;
1437 
1438 	if (NULL == siocb->scm)
1439 		siocb->scm = &tmp_scm;
1440 	wait_for_unix_gc();
1441 	err = scm_send(sock, msg, siocb->scm);
1442 	if (err < 0)
1443 		return err;
1444 
1445 	err = -EOPNOTSUPP;
1446 	if (msg->msg_flags&MSG_OOB)
1447 		goto out;
1448 
1449 	if (msg->msg_namelen) {
1450 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1451 		if (err < 0)
1452 			goto out;
1453 		namelen = err;
1454 	} else {
1455 		sunaddr = NULL;
1456 		err = -ENOTCONN;
1457 		other = unix_peer_get(sk);
1458 		if (!other)
1459 			goto out;
1460 	}
1461 
1462 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1463 	    && (err = unix_autobind(sock)) != 0)
1464 		goto out;
1465 
1466 	err = -EMSGSIZE;
1467 	if (len > sk->sk_sndbuf - 32)
1468 		goto out;
1469 
1470 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1471 	if (skb == NULL)
1472 		goto out;
1473 
1474 	err = unix_scm_to_skb(siocb->scm, skb, true);
1475 	if (err < 0)
1476 		goto out_free;
1477 	max_level = err + 1;
1478 	unix_get_secdata(siocb->scm, skb);
1479 
1480 	skb_reset_transport_header(skb);
1481 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1482 	if (err)
1483 		goto out_free;
1484 
1485 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1486 
1487 restart:
1488 	if (!other) {
1489 		err = -ECONNRESET;
1490 		if (sunaddr == NULL)
1491 			goto out_free;
1492 
1493 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1494 					hash, &err);
1495 		if (other == NULL)
1496 			goto out_free;
1497 	}
1498 
1499 	if (sk_filter(other, skb) < 0) {
1500 		/* Toss the packet but do not return any error to the sender */
1501 		err = len;
1502 		goto out_free;
1503 	}
1504 
1505 	unix_state_lock(other);
1506 	err = -EPERM;
1507 	if (!unix_may_send(sk, other))
1508 		goto out_unlock;
1509 
1510 	if (sock_flag(other, SOCK_DEAD)) {
1511 		/*
1512 		 *	Check with 1003.1g - what should
1513 		 *	datagram error
1514 		 */
1515 		unix_state_unlock(other);
1516 		sock_put(other);
1517 
1518 		err = 0;
1519 		unix_state_lock(sk);
1520 		if (unix_peer(sk) == other) {
1521 			unix_peer(sk) = NULL;
1522 			unix_state_unlock(sk);
1523 
1524 			unix_dgram_disconnected(sk, other);
1525 			sock_put(other);
1526 			err = -ECONNREFUSED;
1527 		} else {
1528 			unix_state_unlock(sk);
1529 		}
1530 
1531 		other = NULL;
1532 		if (err)
1533 			goto out_free;
1534 		goto restart;
1535 	}
1536 
1537 	err = -EPIPE;
1538 	if (other->sk_shutdown & RCV_SHUTDOWN)
1539 		goto out_unlock;
1540 
1541 	if (sk->sk_type != SOCK_SEQPACKET) {
1542 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1543 		if (err)
1544 			goto out_unlock;
1545 	}
1546 
1547 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1548 		if (!timeo) {
1549 			err = -EAGAIN;
1550 			goto out_unlock;
1551 		}
1552 
1553 		timeo = unix_wait_for_peer(other, timeo);
1554 
1555 		err = sock_intr_errno(timeo);
1556 		if (signal_pending(current))
1557 			goto out_free;
1558 
1559 		goto restart;
1560 	}
1561 
1562 	if (sock_flag(other, SOCK_RCVTSTAMP))
1563 		__net_timestamp(skb);
1564 	maybe_add_creds(skb, sock, other);
1565 	skb_queue_tail(&other->sk_receive_queue, skb);
1566 	if (max_level > unix_sk(other)->recursion_level)
1567 		unix_sk(other)->recursion_level = max_level;
1568 	unix_state_unlock(other);
1569 	other->sk_data_ready(other, len);
1570 	sock_put(other);
1571 	scm_destroy(siocb->scm);
1572 	return len;
1573 
1574 out_unlock:
1575 	unix_state_unlock(other);
1576 out_free:
1577 	kfree_skb(skb);
1578 out:
1579 	if (other)
1580 		sock_put(other);
1581 	scm_destroy(siocb->scm);
1582 	return err;
1583 }
1584 
1585 
1586 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1587 			       struct msghdr *msg, size_t len)
1588 {
1589 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1590 	struct sock *sk = sock->sk;
1591 	struct sock *other = NULL;
1592 	int err, size;
1593 	struct sk_buff *skb;
1594 	int sent = 0;
1595 	struct scm_cookie tmp_scm;
1596 	bool fds_sent = false;
1597 	int max_level;
1598 
1599 	if (NULL == siocb->scm)
1600 		siocb->scm = &tmp_scm;
1601 	wait_for_unix_gc();
1602 	err = scm_send(sock, msg, siocb->scm);
1603 	if (err < 0)
1604 		return err;
1605 
1606 	err = -EOPNOTSUPP;
1607 	if (msg->msg_flags&MSG_OOB)
1608 		goto out_err;
1609 
1610 	if (msg->msg_namelen) {
1611 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1612 		goto out_err;
1613 	} else {
1614 		err = -ENOTCONN;
1615 		other = unix_peer(sk);
1616 		if (!other)
1617 			goto out_err;
1618 	}
1619 
1620 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1621 		goto pipe_err;
1622 
1623 	while (sent < len) {
1624 		/*
1625 		 *	Optimisation for the fact that under 0.01% of X
1626 		 *	messages typically need breaking up.
1627 		 */
1628 
1629 		size = len-sent;
1630 
1631 		/* Keep two messages in the pipe so it schedules better */
1632 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1633 			size = (sk->sk_sndbuf >> 1) - 64;
1634 
1635 		if (size > SKB_MAX_ALLOC)
1636 			size = SKB_MAX_ALLOC;
1637 
1638 		/*
1639 		 *	Grab a buffer
1640 		 */
1641 
1642 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1643 					  &err);
1644 
1645 		if (skb == NULL)
1646 			goto out_err;
1647 
1648 		/*
1649 		 *	If you pass two values to the sock_alloc_send_skb
1650 		 *	it tries to grab the large buffer with GFP_NOFS
1651 		 *	(which can fail easily), and if it fails grab the
1652 		 *	fallback size buffer which is under a page and will
1653 		 *	succeed. [Alan]
1654 		 */
1655 		size = min_t(int, size, skb_tailroom(skb));
1656 
1657 
1658 		/* Only send the fds in the first buffer */
1659 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1660 		if (err < 0) {
1661 			kfree_skb(skb);
1662 			goto out_err;
1663 		}
1664 		max_level = err + 1;
1665 		fds_sent = true;
1666 
1667 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1668 		if (err) {
1669 			kfree_skb(skb);
1670 			goto out_err;
1671 		}
1672 
1673 		unix_state_lock(other);
1674 
1675 		if (sock_flag(other, SOCK_DEAD) ||
1676 		    (other->sk_shutdown & RCV_SHUTDOWN))
1677 			goto pipe_err_free;
1678 
1679 		maybe_add_creds(skb, sock, other);
1680 		skb_queue_tail(&other->sk_receive_queue, skb);
1681 		if (max_level > unix_sk(other)->recursion_level)
1682 			unix_sk(other)->recursion_level = max_level;
1683 		unix_state_unlock(other);
1684 		other->sk_data_ready(other, size);
1685 		sent += size;
1686 	}
1687 
1688 	scm_destroy(siocb->scm);
1689 	siocb->scm = NULL;
1690 
1691 	return sent;
1692 
1693 pipe_err_free:
1694 	unix_state_unlock(other);
1695 	kfree_skb(skb);
1696 pipe_err:
1697 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1698 		send_sig(SIGPIPE, current, 0);
1699 	err = -EPIPE;
1700 out_err:
1701 	scm_destroy(siocb->scm);
1702 	siocb->scm = NULL;
1703 	return sent ? : err;
1704 }
1705 
1706 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1707 				  struct msghdr *msg, size_t len)
1708 {
1709 	int err;
1710 	struct sock *sk = sock->sk;
1711 
1712 	err = sock_error(sk);
1713 	if (err)
1714 		return err;
1715 
1716 	if (sk->sk_state != TCP_ESTABLISHED)
1717 		return -ENOTCONN;
1718 
1719 	if (msg->msg_namelen)
1720 		msg->msg_namelen = 0;
1721 
1722 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1723 }
1724 
1725 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1726 			      struct msghdr *msg, size_t size,
1727 			      int flags)
1728 {
1729 	struct sock *sk = sock->sk;
1730 
1731 	if (sk->sk_state != TCP_ESTABLISHED)
1732 		return -ENOTCONN;
1733 
1734 	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1735 }
1736 
1737 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1738 {
1739 	struct unix_sock *u = unix_sk(sk);
1740 
1741 	msg->msg_namelen = 0;
1742 	if (u->addr) {
1743 		msg->msg_namelen = u->addr->len;
1744 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1745 	}
1746 }
1747 
1748 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1749 			      struct msghdr *msg, size_t size,
1750 			      int flags)
1751 {
1752 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1753 	struct scm_cookie tmp_scm;
1754 	struct sock *sk = sock->sk;
1755 	struct unix_sock *u = unix_sk(sk);
1756 	int noblock = flags & MSG_DONTWAIT;
1757 	struct sk_buff *skb;
1758 	int err;
1759 
1760 	err = -EOPNOTSUPP;
1761 	if (flags&MSG_OOB)
1762 		goto out;
1763 
1764 	msg->msg_namelen = 0;
1765 
1766 	err = mutex_lock_interruptible(&u->readlock);
1767 	if (err) {
1768 		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1769 		goto out;
1770 	}
1771 
1772 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1773 	if (!skb) {
1774 		unix_state_lock(sk);
1775 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1776 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1777 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1778 			err = 0;
1779 		unix_state_unlock(sk);
1780 		goto out_unlock;
1781 	}
1782 
1783 	wake_up_interruptible_sync_poll(&u->peer_wait,
1784 					POLLOUT | POLLWRNORM | POLLWRBAND);
1785 
1786 	if (msg->msg_name)
1787 		unix_copy_addr(msg, skb->sk);
1788 
1789 	if (size > skb->len)
1790 		size = skb->len;
1791 	else if (size < skb->len)
1792 		msg->msg_flags |= MSG_TRUNC;
1793 
1794 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1795 	if (err)
1796 		goto out_free;
1797 
1798 	if (sock_flag(sk, SOCK_RCVTSTAMP))
1799 		__sock_recv_timestamp(msg, sk, skb);
1800 
1801 	if (!siocb->scm) {
1802 		siocb->scm = &tmp_scm;
1803 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1804 	}
1805 	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1806 	unix_set_secdata(siocb->scm, skb);
1807 
1808 	if (!(flags & MSG_PEEK)) {
1809 		if (UNIXCB(skb).fp)
1810 			unix_detach_fds(siocb->scm, skb);
1811 	} else {
1812 		/* It is questionable: on PEEK we could:
1813 		   - do not return fds - good, but too simple 8)
1814 		   - return fds, and do not return them on read (old strategy,
1815 		     apparently wrong)
1816 		   - clone fds (I chose it for now, it is the most universal
1817 		     solution)
1818 
1819 		   POSIX 1003.1g does not actually define this clearly
1820 		   at all. POSIX 1003.1g doesn't define a lot of things
1821 		   clearly however!
1822 
1823 		*/
1824 		if (UNIXCB(skb).fp)
1825 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1826 	}
1827 	err = size;
1828 
1829 	scm_recv(sock, msg, siocb->scm, flags);
1830 
1831 out_free:
1832 	skb_free_datagram(sk, skb);
1833 out_unlock:
1834 	mutex_unlock(&u->readlock);
1835 out:
1836 	return err;
1837 }
1838 
1839 /*
1840  *	Sleep until data has arrive. But check for races..
1841  */
1842 
1843 static long unix_stream_data_wait(struct sock *sk, long timeo)
1844 {
1845 	DEFINE_WAIT(wait);
1846 
1847 	unix_state_lock(sk);
1848 
1849 	for (;;) {
1850 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1851 
1852 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1853 		    sk->sk_err ||
1854 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1855 		    signal_pending(current) ||
1856 		    !timeo)
1857 			break;
1858 
1859 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1860 		unix_state_unlock(sk);
1861 		timeo = schedule_timeout(timeo);
1862 		unix_state_lock(sk);
1863 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1864 	}
1865 
1866 	finish_wait(sk_sleep(sk), &wait);
1867 	unix_state_unlock(sk);
1868 	return timeo;
1869 }
1870 
1871 
1872 
1873 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1874 			       struct msghdr *msg, size_t size,
1875 			       int flags)
1876 {
1877 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1878 	struct scm_cookie tmp_scm;
1879 	struct sock *sk = sock->sk;
1880 	struct unix_sock *u = unix_sk(sk);
1881 	struct sockaddr_un *sunaddr = msg->msg_name;
1882 	int copied = 0;
1883 	int check_creds = 0;
1884 	int target;
1885 	int err = 0;
1886 	long timeo;
1887 
1888 	err = -EINVAL;
1889 	if (sk->sk_state != TCP_ESTABLISHED)
1890 		goto out;
1891 
1892 	err = -EOPNOTSUPP;
1893 	if (flags&MSG_OOB)
1894 		goto out;
1895 
1896 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1897 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1898 
1899 	msg->msg_namelen = 0;
1900 
1901 	/* Lock the socket to prevent queue disordering
1902 	 * while sleeps in memcpy_tomsg
1903 	 */
1904 
1905 	if (!siocb->scm) {
1906 		siocb->scm = &tmp_scm;
1907 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1908 	}
1909 
1910 	err = mutex_lock_interruptible(&u->readlock);
1911 	if (err) {
1912 		err = sock_intr_errno(timeo);
1913 		goto out;
1914 	}
1915 
1916 	do {
1917 		int chunk;
1918 		struct sk_buff *skb;
1919 
1920 		unix_state_lock(sk);
1921 		skb = skb_peek(&sk->sk_receive_queue);
1922 		if (skb == NULL) {
1923 			unix_sk(sk)->recursion_level = 0;
1924 			if (copied >= target)
1925 				goto unlock;
1926 
1927 			/*
1928 			 *	POSIX 1003.1g mandates this order.
1929 			 */
1930 
1931 			err = sock_error(sk);
1932 			if (err)
1933 				goto unlock;
1934 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1935 				goto unlock;
1936 
1937 			unix_state_unlock(sk);
1938 			err = -EAGAIN;
1939 			if (!timeo)
1940 				break;
1941 			mutex_unlock(&u->readlock);
1942 
1943 			timeo = unix_stream_data_wait(sk, timeo);
1944 
1945 			if (signal_pending(current)
1946 			    ||  mutex_lock_interruptible(&u->readlock)) {
1947 				err = sock_intr_errno(timeo);
1948 				goto out;
1949 			}
1950 
1951 			continue;
1952  unlock:
1953 			unix_state_unlock(sk);
1954 			break;
1955 		}
1956 		unix_state_unlock(sk);
1957 
1958 		if (check_creds) {
1959 			/* Never glue messages from different writers */
1960 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1961 			    (UNIXCB(skb).cred != siocb->scm->cred))
1962 				break;
1963 		} else {
1964 			/* Copy credentials */
1965 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1966 			check_creds = 1;
1967 		}
1968 
1969 		/* Copy address just once */
1970 		if (sunaddr) {
1971 			unix_copy_addr(msg, skb->sk);
1972 			sunaddr = NULL;
1973 		}
1974 
1975 		chunk = min_t(unsigned int, skb->len, size);
1976 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1977 			if (copied == 0)
1978 				copied = -EFAULT;
1979 			break;
1980 		}
1981 		copied += chunk;
1982 		size -= chunk;
1983 
1984 		/* Mark read part of skb as used */
1985 		if (!(flags & MSG_PEEK)) {
1986 			skb_pull(skb, chunk);
1987 
1988 			if (UNIXCB(skb).fp)
1989 				unix_detach_fds(siocb->scm, skb);
1990 
1991 			if (skb->len)
1992 				break;
1993 
1994 			skb_unlink(skb, &sk->sk_receive_queue);
1995 			consume_skb(skb);
1996 
1997 			if (siocb->scm->fp)
1998 				break;
1999 		} else {
2000 			/* It is questionable, see note in unix_dgram_recvmsg.
2001 			 */
2002 			if (UNIXCB(skb).fp)
2003 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2004 
2005 			break;
2006 		}
2007 	} while (size);
2008 
2009 	mutex_unlock(&u->readlock);
2010 	scm_recv(sock, msg, siocb->scm, flags);
2011 out:
2012 	return copied ? : err;
2013 }
2014 
2015 static int unix_shutdown(struct socket *sock, int mode)
2016 {
2017 	struct sock *sk = sock->sk;
2018 	struct sock *other;
2019 
2020 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2021 
2022 	if (!mode)
2023 		return 0;
2024 
2025 	unix_state_lock(sk);
2026 	sk->sk_shutdown |= mode;
2027 	other = unix_peer(sk);
2028 	if (other)
2029 		sock_hold(other);
2030 	unix_state_unlock(sk);
2031 	sk->sk_state_change(sk);
2032 
2033 	if (other &&
2034 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2035 
2036 		int peer_mode = 0;
2037 
2038 		if (mode&RCV_SHUTDOWN)
2039 			peer_mode |= SEND_SHUTDOWN;
2040 		if (mode&SEND_SHUTDOWN)
2041 			peer_mode |= RCV_SHUTDOWN;
2042 		unix_state_lock(other);
2043 		other->sk_shutdown |= peer_mode;
2044 		unix_state_unlock(other);
2045 		other->sk_state_change(other);
2046 		if (peer_mode == SHUTDOWN_MASK)
2047 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2048 		else if (peer_mode & RCV_SHUTDOWN)
2049 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2050 	}
2051 	if (other)
2052 		sock_put(other);
2053 
2054 	return 0;
2055 }
2056 
2057 long unix_inq_len(struct sock *sk)
2058 {
2059 	struct sk_buff *skb;
2060 	long amount = 0;
2061 
2062 	if (sk->sk_state == TCP_LISTEN)
2063 		return -EINVAL;
2064 
2065 	spin_lock(&sk->sk_receive_queue.lock);
2066 	if (sk->sk_type == SOCK_STREAM ||
2067 	    sk->sk_type == SOCK_SEQPACKET) {
2068 		skb_queue_walk(&sk->sk_receive_queue, skb)
2069 			amount += skb->len;
2070 	} else {
2071 		skb = skb_peek(&sk->sk_receive_queue);
2072 		if (skb)
2073 			amount = skb->len;
2074 	}
2075 	spin_unlock(&sk->sk_receive_queue.lock);
2076 
2077 	return amount;
2078 }
2079 EXPORT_SYMBOL_GPL(unix_inq_len);
2080 
2081 long unix_outq_len(struct sock *sk)
2082 {
2083 	return sk_wmem_alloc_get(sk);
2084 }
2085 EXPORT_SYMBOL_GPL(unix_outq_len);
2086 
2087 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2088 {
2089 	struct sock *sk = sock->sk;
2090 	long amount = 0;
2091 	int err;
2092 
2093 	switch (cmd) {
2094 	case SIOCOUTQ:
2095 		amount = unix_outq_len(sk);
2096 		err = put_user(amount, (int __user *)arg);
2097 		break;
2098 	case SIOCINQ:
2099 		amount = unix_inq_len(sk);
2100 		if (amount < 0)
2101 			err = amount;
2102 		else
2103 			err = put_user(amount, (int __user *)arg);
2104 		break;
2105 	default:
2106 		err = -ENOIOCTLCMD;
2107 		break;
2108 	}
2109 	return err;
2110 }
2111 
2112 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2113 {
2114 	struct sock *sk = sock->sk;
2115 	unsigned int mask;
2116 
2117 	sock_poll_wait(file, sk_sleep(sk), wait);
2118 	mask = 0;
2119 
2120 	/* exceptional events? */
2121 	if (sk->sk_err)
2122 		mask |= POLLERR;
2123 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2124 		mask |= POLLHUP;
2125 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2126 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2127 
2128 	/* readable? */
2129 	if (!skb_queue_empty(&sk->sk_receive_queue))
2130 		mask |= POLLIN | POLLRDNORM;
2131 
2132 	/* Connection-based need to check for termination and startup */
2133 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2134 	    sk->sk_state == TCP_CLOSE)
2135 		mask |= POLLHUP;
2136 
2137 	/*
2138 	 * we set writable also when the other side has shut down the
2139 	 * connection. This prevents stuck sockets.
2140 	 */
2141 	if (unix_writable(sk))
2142 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2143 
2144 	return mask;
2145 }
2146 
2147 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2148 				    poll_table *wait)
2149 {
2150 	struct sock *sk = sock->sk, *other;
2151 	unsigned int mask, writable;
2152 
2153 	sock_poll_wait(file, sk_sleep(sk), wait);
2154 	mask = 0;
2155 
2156 	/* exceptional events? */
2157 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2158 		mask |= POLLERR;
2159 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2160 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2161 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2162 		mask |= POLLHUP;
2163 
2164 	/* readable? */
2165 	if (!skb_queue_empty(&sk->sk_receive_queue))
2166 		mask |= POLLIN | POLLRDNORM;
2167 
2168 	/* Connection-based need to check for termination and startup */
2169 	if (sk->sk_type == SOCK_SEQPACKET) {
2170 		if (sk->sk_state == TCP_CLOSE)
2171 			mask |= POLLHUP;
2172 		/* connection hasn't started yet? */
2173 		if (sk->sk_state == TCP_SYN_SENT)
2174 			return mask;
2175 	}
2176 
2177 	/* No write status requested, avoid expensive OUT tests. */
2178 	if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2179 		return mask;
2180 
2181 	writable = unix_writable(sk);
2182 	other = unix_peer_get(sk);
2183 	if (other) {
2184 		if (unix_peer(other) != sk) {
2185 			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2186 			if (unix_recvq_full(other))
2187 				writable = 0;
2188 		}
2189 		sock_put(other);
2190 	}
2191 
2192 	if (writable)
2193 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2194 	else
2195 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2196 
2197 	return mask;
2198 }
2199 
2200 #ifdef CONFIG_PROC_FS
2201 static struct sock *first_unix_socket(int *i)
2202 {
2203 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2204 		if (!hlist_empty(&unix_socket_table[*i]))
2205 			return __sk_head(&unix_socket_table[*i]);
2206 	}
2207 	return NULL;
2208 }
2209 
2210 static struct sock *next_unix_socket(int *i, struct sock *s)
2211 {
2212 	struct sock *next = sk_next(s);
2213 	/* More in this chain? */
2214 	if (next)
2215 		return next;
2216 	/* Look for next non-empty chain. */
2217 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2218 		if (!hlist_empty(&unix_socket_table[*i]))
2219 			return __sk_head(&unix_socket_table[*i]);
2220 	}
2221 	return NULL;
2222 }
2223 
2224 struct unix_iter_state {
2225 	struct seq_net_private p;
2226 	int i;
2227 };
2228 
2229 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2230 {
2231 	struct unix_iter_state *iter = seq->private;
2232 	loff_t off = 0;
2233 	struct sock *s;
2234 
2235 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2236 		if (sock_net(s) != seq_file_net(seq))
2237 			continue;
2238 		if (off == pos)
2239 			return s;
2240 		++off;
2241 	}
2242 	return NULL;
2243 }
2244 
2245 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2246 	__acquires(unix_table_lock)
2247 {
2248 	spin_lock(&unix_table_lock);
2249 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2250 }
2251 
2252 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2253 {
2254 	struct unix_iter_state *iter = seq->private;
2255 	struct sock *sk = v;
2256 	++*pos;
2257 
2258 	if (v == SEQ_START_TOKEN)
2259 		sk = first_unix_socket(&iter->i);
2260 	else
2261 		sk = next_unix_socket(&iter->i, sk);
2262 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2263 		sk = next_unix_socket(&iter->i, sk);
2264 	return sk;
2265 }
2266 
2267 static void unix_seq_stop(struct seq_file *seq, void *v)
2268 	__releases(unix_table_lock)
2269 {
2270 	spin_unlock(&unix_table_lock);
2271 }
2272 
2273 static int unix_seq_show(struct seq_file *seq, void *v)
2274 {
2275 
2276 	if (v == SEQ_START_TOKEN)
2277 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2278 			 "Inode Path\n");
2279 	else {
2280 		struct sock *s = v;
2281 		struct unix_sock *u = unix_sk(s);
2282 		unix_state_lock(s);
2283 
2284 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2285 			s,
2286 			atomic_read(&s->sk_refcnt),
2287 			0,
2288 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2289 			s->sk_type,
2290 			s->sk_socket ?
2291 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2292 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2293 			sock_i_ino(s));
2294 
2295 		if (u->addr) {
2296 			int i, len;
2297 			seq_putc(seq, ' ');
2298 
2299 			i = 0;
2300 			len = u->addr->len - sizeof(short);
2301 			if (!UNIX_ABSTRACT(s))
2302 				len--;
2303 			else {
2304 				seq_putc(seq, '@');
2305 				i++;
2306 			}
2307 			for ( ; i < len; i++)
2308 				seq_putc(seq, u->addr->name->sun_path[i]);
2309 		}
2310 		unix_state_unlock(s);
2311 		seq_putc(seq, '\n');
2312 	}
2313 
2314 	return 0;
2315 }
2316 
2317 static const struct seq_operations unix_seq_ops = {
2318 	.start  = unix_seq_start,
2319 	.next   = unix_seq_next,
2320 	.stop   = unix_seq_stop,
2321 	.show   = unix_seq_show,
2322 };
2323 
2324 static int unix_seq_open(struct inode *inode, struct file *file)
2325 {
2326 	return seq_open_net(inode, file, &unix_seq_ops,
2327 			    sizeof(struct unix_iter_state));
2328 }
2329 
2330 static const struct file_operations unix_seq_fops = {
2331 	.owner		= THIS_MODULE,
2332 	.open		= unix_seq_open,
2333 	.read		= seq_read,
2334 	.llseek		= seq_lseek,
2335 	.release	= seq_release_net,
2336 };
2337 
2338 #endif
2339 
2340 static const struct net_proto_family unix_family_ops = {
2341 	.family = PF_UNIX,
2342 	.create = unix_create,
2343 	.owner	= THIS_MODULE,
2344 };
2345 
2346 
2347 static int __net_init unix_net_init(struct net *net)
2348 {
2349 	int error = -ENOMEM;
2350 
2351 	net->unx.sysctl_max_dgram_qlen = 10;
2352 	if (unix_sysctl_register(net))
2353 		goto out;
2354 
2355 #ifdef CONFIG_PROC_FS
2356 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2357 		unix_sysctl_unregister(net);
2358 		goto out;
2359 	}
2360 #endif
2361 	error = 0;
2362 out:
2363 	return error;
2364 }
2365 
2366 static void __net_exit unix_net_exit(struct net *net)
2367 {
2368 	unix_sysctl_unregister(net);
2369 	proc_net_remove(net, "unix");
2370 }
2371 
2372 static struct pernet_operations unix_net_ops = {
2373 	.init = unix_net_init,
2374 	.exit = unix_net_exit,
2375 };
2376 
2377 static int __init af_unix_init(void)
2378 {
2379 	int rc = -1;
2380 	struct sk_buff *dummy_skb;
2381 
2382 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2383 
2384 	rc = proto_register(&unix_proto, 1);
2385 	if (rc != 0) {
2386 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2387 		       __func__);
2388 		goto out;
2389 	}
2390 
2391 	sock_register(&unix_family_ops);
2392 	register_pernet_subsys(&unix_net_ops);
2393 out:
2394 	return rc;
2395 }
2396 
2397 static void __exit af_unix_exit(void)
2398 {
2399 	sock_unregister(PF_UNIX);
2400 	proto_unregister(&unix_proto);
2401 	unregister_pernet_subsys(&unix_net_ops);
2402 }
2403 
2404 /* Earlier than device_initcall() so that other drivers invoking
2405    request_module() don't end up in a loop when modprobe tries
2406    to use a UNIX socket. But later than subsys_initcall() because
2407    we depend on stuff initialised there */
2408 fs_initcall(af_unix_init);
2409 module_exit(af_unix_exit);
2410 
2411 MODULE_LICENSE("GPL");
2412 MODULE_ALIAS_NETPROTO(PF_UNIX);
2413