xref: /openbmc/linux/net/unix/af_unix.c (revision e7065e20)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 EXPORT_SYMBOL_GPL(unix_socket_table);
120 DEFINE_SPINLOCK(unix_table_lock);
121 EXPORT_SYMBOL_GPL(unix_table_lock);
122 static atomic_long_t unix_nr_socks;
123 
124 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
125 
126 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
127 
128 #ifdef CONFIG_SECURITY_NETWORK
129 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
130 {
131 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
132 }
133 
134 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135 {
136 	scm->secid = *UNIXSID(skb);
137 }
138 #else
139 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 { }
141 
142 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 { }
144 #endif /* CONFIG_SECURITY_NETWORK */
145 
146 /*
147  *  SMP locking strategy:
148  *    hash table is protected with spinlock unix_table_lock
149  *    each socket state is protected by separate spin lock.
150  */
151 
152 static inline unsigned unix_hash_fold(__wsum n)
153 {
154 	unsigned hash = (__force unsigned)n;
155 	hash ^= hash>>16;
156 	hash ^= hash>>8;
157 	return hash&(UNIX_HASH_SIZE-1);
158 }
159 
160 #define unix_peer(sk) (unix_sk(sk)->peer)
161 
162 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
163 {
164 	return unix_peer(osk) == sk;
165 }
166 
167 static inline int unix_may_send(struct sock *sk, struct sock *osk)
168 {
169 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
170 }
171 
172 static inline int unix_recvq_full(struct sock const *sk)
173 {
174 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175 }
176 
177 struct sock *unix_peer_get(struct sock *s)
178 {
179 	struct sock *peer;
180 
181 	unix_state_lock(s);
182 	peer = unix_peer(s);
183 	if (peer)
184 		sock_hold(peer);
185 	unix_state_unlock(s);
186 	return peer;
187 }
188 EXPORT_SYMBOL_GPL(unix_peer_get);
189 
190 static inline void unix_release_addr(struct unix_address *addr)
191 {
192 	if (atomic_dec_and_test(&addr->refcnt))
193 		kfree(addr);
194 }
195 
196 /*
197  *	Check unix socket name:
198  *		- should be not zero length.
199  *	        - if started by not zero, should be NULL terminated (FS object)
200  *		- if started by zero, it is abstract name.
201  */
202 
203 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
204 {
205 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
206 		return -EINVAL;
207 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
208 		return -EINVAL;
209 	if (sunaddr->sun_path[0]) {
210 		/*
211 		 * This may look like an off by one error but it is a bit more
212 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
213 		 * sun_path[108] doesn't as such exist.  However in kernel space
214 		 * we are guaranteed that it is a valid memory location in our
215 		 * kernel address buffer.
216 		 */
217 		((char *)sunaddr)[len] = 0;
218 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
219 		return len;
220 	}
221 
222 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
223 	return len;
224 }
225 
226 static void __unix_remove_socket(struct sock *sk)
227 {
228 	sk_del_node_init(sk);
229 }
230 
231 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
232 {
233 	WARN_ON(!sk_unhashed(sk));
234 	sk_add_node(sk, list);
235 }
236 
237 static inline void unix_remove_socket(struct sock *sk)
238 {
239 	spin_lock(&unix_table_lock);
240 	__unix_remove_socket(sk);
241 	spin_unlock(&unix_table_lock);
242 }
243 
244 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
245 {
246 	spin_lock(&unix_table_lock);
247 	__unix_insert_socket(list, sk);
248 	spin_unlock(&unix_table_lock);
249 }
250 
251 static struct sock *__unix_find_socket_byname(struct net *net,
252 					      struct sockaddr_un *sunname,
253 					      int len, int type, unsigned hash)
254 {
255 	struct sock *s;
256 	struct hlist_node *node;
257 
258 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
259 		struct unix_sock *u = unix_sk(s);
260 
261 		if (!net_eq(sock_net(s), net))
262 			continue;
263 
264 		if (u->addr->len == len &&
265 		    !memcmp(u->addr->name, sunname, len))
266 			goto found;
267 	}
268 	s = NULL;
269 found:
270 	return s;
271 }
272 
273 static inline struct sock *unix_find_socket_byname(struct net *net,
274 						   struct sockaddr_un *sunname,
275 						   int len, int type,
276 						   unsigned hash)
277 {
278 	struct sock *s;
279 
280 	spin_lock(&unix_table_lock);
281 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
282 	if (s)
283 		sock_hold(s);
284 	spin_unlock(&unix_table_lock);
285 	return s;
286 }
287 
288 static struct sock *unix_find_socket_byinode(struct inode *i)
289 {
290 	struct sock *s;
291 	struct hlist_node *node;
292 
293 	spin_lock(&unix_table_lock);
294 	sk_for_each(s, node,
295 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
296 		struct dentry *dentry = unix_sk(s)->path.dentry;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
314 static void unix_write_space(struct sock *sk)
315 {
316 	struct socket_wq *wq;
317 
318 	rcu_read_lock();
319 	if (unix_writable(sk)) {
320 		wq = rcu_dereference(sk->sk_wq);
321 		if (wq_has_sleeper(wq))
322 			wake_up_interruptible_sync_poll(&wq->wait,
323 				POLLOUT | POLLWRNORM | POLLWRBAND);
324 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
325 	}
326 	rcu_read_unlock();
327 }
328 
329 /* When dgram socket disconnects (or changes its peer), we clear its receive
330  * queue of packets arrived from previous peer. First, it allows to do
331  * flow control based only on wmem_alloc; second, sk connected to peer
332  * may receive messages only from that peer. */
333 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
334 {
335 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
336 		skb_queue_purge(&sk->sk_receive_queue);
337 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
338 
339 		/* If one link of bidirectional dgram pipe is disconnected,
340 		 * we signal error. Messages are lost. Do not make this,
341 		 * when peer was not connected to us.
342 		 */
343 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
344 			other->sk_err = ECONNRESET;
345 			other->sk_error_report(other);
346 		}
347 	}
348 }
349 
350 static void unix_sock_destructor(struct sock *sk)
351 {
352 	struct unix_sock *u = unix_sk(sk);
353 
354 	skb_queue_purge(&sk->sk_receive_queue);
355 
356 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
357 	WARN_ON(!sk_unhashed(sk));
358 	WARN_ON(sk->sk_socket);
359 	if (!sock_flag(sk, SOCK_DEAD)) {
360 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
361 		return;
362 	}
363 
364 	if (u->addr)
365 		unix_release_addr(u->addr);
366 
367 	atomic_long_dec(&unix_nr_socks);
368 	local_bh_disable();
369 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
370 	local_bh_enable();
371 #ifdef UNIX_REFCNT_DEBUG
372 	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
373 		atomic_long_read(&unix_nr_socks));
374 #endif
375 }
376 
377 static int unix_release_sock(struct sock *sk, int embrion)
378 {
379 	struct unix_sock *u = unix_sk(sk);
380 	struct path path;
381 	struct sock *skpair;
382 	struct sk_buff *skb;
383 	int state;
384 
385 	unix_remove_socket(sk);
386 
387 	/* Clear state */
388 	unix_state_lock(sk);
389 	sock_orphan(sk);
390 	sk->sk_shutdown = SHUTDOWN_MASK;
391 	path	     = u->path;
392 	u->path.dentry = NULL;
393 	u->path.mnt = NULL;
394 	state = sk->sk_state;
395 	sk->sk_state = TCP_CLOSE;
396 	unix_state_unlock(sk);
397 
398 	wake_up_interruptible_all(&u->peer_wait);
399 
400 	skpair = unix_peer(sk);
401 
402 	if (skpair != NULL) {
403 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
404 			unix_state_lock(skpair);
405 			/* No more writes */
406 			skpair->sk_shutdown = SHUTDOWN_MASK;
407 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
408 				skpair->sk_err = ECONNRESET;
409 			unix_state_unlock(skpair);
410 			skpair->sk_state_change(skpair);
411 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
412 		}
413 		sock_put(skpair); /* It may now die */
414 		unix_peer(sk) = NULL;
415 	}
416 
417 	/* Try to flush out this socket. Throw out buffers at least */
418 
419 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420 		if (state == TCP_LISTEN)
421 			unix_release_sock(skb->sk, 1);
422 		/* passed fds are erased in the kfree_skb hook	      */
423 		kfree_skb(skb);
424 	}
425 
426 	if (path.dentry)
427 		path_put(&path);
428 
429 	sock_put(sk);
430 
431 	/* ---- Socket is dead now and most probably destroyed ---- */
432 
433 	/*
434 	 * Fixme: BSD difference: In BSD all sockets connected to use get
435 	 *	  ECONNRESET and we die on the spot. In Linux we behave
436 	 *	  like files and pipes do and wait for the last
437 	 *	  dereference.
438 	 *
439 	 * Can't we simply set sock->err?
440 	 *
441 	 *	  What the above comment does talk about? --ANK(980817)
442 	 */
443 
444 	if (unix_tot_inflight)
445 		unix_gc();		/* Garbage collect fds */
446 
447 	return 0;
448 }
449 
450 static void init_peercred(struct sock *sk)
451 {
452 	put_pid(sk->sk_peer_pid);
453 	if (sk->sk_peer_cred)
454 		put_cred(sk->sk_peer_cred);
455 	sk->sk_peer_pid  = get_pid(task_tgid(current));
456 	sk->sk_peer_cred = get_current_cred();
457 }
458 
459 static void copy_peercred(struct sock *sk, struct sock *peersk)
460 {
461 	put_pid(sk->sk_peer_pid);
462 	if (sk->sk_peer_cred)
463 		put_cred(sk->sk_peer_cred);
464 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
465 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
466 }
467 
468 static int unix_listen(struct socket *sock, int backlog)
469 {
470 	int err;
471 	struct sock *sk = sock->sk;
472 	struct unix_sock *u = unix_sk(sk);
473 	struct pid *old_pid = NULL;
474 	const struct cred *old_cred = NULL;
475 
476 	err = -EOPNOTSUPP;
477 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
478 		goto out;	/* Only stream/seqpacket sockets accept */
479 	err = -EINVAL;
480 	if (!u->addr)
481 		goto out;	/* No listens on an unbound socket */
482 	unix_state_lock(sk);
483 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
484 		goto out_unlock;
485 	if (backlog > sk->sk_max_ack_backlog)
486 		wake_up_interruptible_all(&u->peer_wait);
487 	sk->sk_max_ack_backlog	= backlog;
488 	sk->sk_state		= TCP_LISTEN;
489 	/* set credentials so connect can copy them */
490 	init_peercred(sk);
491 	err = 0;
492 
493 out_unlock:
494 	unix_state_unlock(sk);
495 	put_pid(old_pid);
496 	if (old_cred)
497 		put_cred(old_cred);
498 out:
499 	return err;
500 }
501 
502 static int unix_release(struct socket *);
503 static int unix_bind(struct socket *, struct sockaddr *, int);
504 static int unix_stream_connect(struct socket *, struct sockaddr *,
505 			       int addr_len, int flags);
506 static int unix_socketpair(struct socket *, struct socket *);
507 static int unix_accept(struct socket *, struct socket *, int);
508 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
509 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
510 static unsigned int unix_dgram_poll(struct file *, struct socket *,
511 				    poll_table *);
512 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
513 static int unix_shutdown(struct socket *, int);
514 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
515 			       struct msghdr *, size_t);
516 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
517 			       struct msghdr *, size_t, int);
518 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
519 			      struct msghdr *, size_t);
520 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
521 			      struct msghdr *, size_t, int);
522 static int unix_dgram_connect(struct socket *, struct sockaddr *,
523 			      int, int);
524 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
525 				  struct msghdr *, size_t);
526 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
527 				  struct msghdr *, size_t, int);
528 
529 static void unix_set_peek_off(struct sock *sk, int val)
530 {
531 	struct unix_sock *u = unix_sk(sk);
532 
533 	mutex_lock(&u->readlock);
534 	sk->sk_peek_off = val;
535 	mutex_unlock(&u->readlock);
536 }
537 
538 
539 static const struct proto_ops unix_stream_ops = {
540 	.family =	PF_UNIX,
541 	.owner =	THIS_MODULE,
542 	.release =	unix_release,
543 	.bind =		unix_bind,
544 	.connect =	unix_stream_connect,
545 	.socketpair =	unix_socketpair,
546 	.accept =	unix_accept,
547 	.getname =	unix_getname,
548 	.poll =		unix_poll,
549 	.ioctl =	unix_ioctl,
550 	.listen =	unix_listen,
551 	.shutdown =	unix_shutdown,
552 	.setsockopt =	sock_no_setsockopt,
553 	.getsockopt =	sock_no_getsockopt,
554 	.sendmsg =	unix_stream_sendmsg,
555 	.recvmsg =	unix_stream_recvmsg,
556 	.mmap =		sock_no_mmap,
557 	.sendpage =	sock_no_sendpage,
558 	.set_peek_off =	unix_set_peek_off,
559 };
560 
561 static const struct proto_ops unix_dgram_ops = {
562 	.family =	PF_UNIX,
563 	.owner =	THIS_MODULE,
564 	.release =	unix_release,
565 	.bind =		unix_bind,
566 	.connect =	unix_dgram_connect,
567 	.socketpair =	unix_socketpair,
568 	.accept =	sock_no_accept,
569 	.getname =	unix_getname,
570 	.poll =		unix_dgram_poll,
571 	.ioctl =	unix_ioctl,
572 	.listen =	sock_no_listen,
573 	.shutdown =	unix_shutdown,
574 	.setsockopt =	sock_no_setsockopt,
575 	.getsockopt =	sock_no_getsockopt,
576 	.sendmsg =	unix_dgram_sendmsg,
577 	.recvmsg =	unix_dgram_recvmsg,
578 	.mmap =		sock_no_mmap,
579 	.sendpage =	sock_no_sendpage,
580 	.set_peek_off =	unix_set_peek_off,
581 };
582 
583 static const struct proto_ops unix_seqpacket_ops = {
584 	.family =	PF_UNIX,
585 	.owner =	THIS_MODULE,
586 	.release =	unix_release,
587 	.bind =		unix_bind,
588 	.connect =	unix_stream_connect,
589 	.socketpair =	unix_socketpair,
590 	.accept =	unix_accept,
591 	.getname =	unix_getname,
592 	.poll =		unix_dgram_poll,
593 	.ioctl =	unix_ioctl,
594 	.listen =	unix_listen,
595 	.shutdown =	unix_shutdown,
596 	.setsockopt =	sock_no_setsockopt,
597 	.getsockopt =	sock_no_getsockopt,
598 	.sendmsg =	unix_seqpacket_sendmsg,
599 	.recvmsg =	unix_seqpacket_recvmsg,
600 	.mmap =		sock_no_mmap,
601 	.sendpage =	sock_no_sendpage,
602 	.set_peek_off =	unix_set_peek_off,
603 };
604 
605 static struct proto unix_proto = {
606 	.name			= "UNIX",
607 	.owner			= THIS_MODULE,
608 	.obj_size		= sizeof(struct unix_sock),
609 };
610 
611 /*
612  * AF_UNIX sockets do not interact with hardware, hence they
613  * dont trigger interrupts - so it's safe for them to have
614  * bh-unsafe locking for their sk_receive_queue.lock. Split off
615  * this special lock-class by reinitializing the spinlock key:
616  */
617 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
618 
619 static struct sock *unix_create1(struct net *net, struct socket *sock)
620 {
621 	struct sock *sk = NULL;
622 	struct unix_sock *u;
623 
624 	atomic_long_inc(&unix_nr_socks);
625 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
626 		goto out;
627 
628 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
629 	if (!sk)
630 		goto out;
631 
632 	sock_init_data(sock, sk);
633 	lockdep_set_class(&sk->sk_receive_queue.lock,
634 				&af_unix_sk_receive_queue_lock_key);
635 
636 	sk->sk_write_space	= unix_write_space;
637 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
638 	sk->sk_destruct		= unix_sock_destructor;
639 	u	  = unix_sk(sk);
640 	u->path.dentry = NULL;
641 	u->path.mnt = NULL;
642 	spin_lock_init(&u->lock);
643 	atomic_long_set(&u->inflight, 0);
644 	INIT_LIST_HEAD(&u->link);
645 	mutex_init(&u->readlock); /* single task reading lock */
646 	init_waitqueue_head(&u->peer_wait);
647 	unix_insert_socket(unix_sockets_unbound, sk);
648 out:
649 	if (sk == NULL)
650 		atomic_long_dec(&unix_nr_socks);
651 	else {
652 		local_bh_disable();
653 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
654 		local_bh_enable();
655 	}
656 	return sk;
657 }
658 
659 static int unix_create(struct net *net, struct socket *sock, int protocol,
660 		       int kern)
661 {
662 	if (protocol && protocol != PF_UNIX)
663 		return -EPROTONOSUPPORT;
664 
665 	sock->state = SS_UNCONNECTED;
666 
667 	switch (sock->type) {
668 	case SOCK_STREAM:
669 		sock->ops = &unix_stream_ops;
670 		break;
671 		/*
672 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
673 		 *	nothing uses it.
674 		 */
675 	case SOCK_RAW:
676 		sock->type = SOCK_DGRAM;
677 	case SOCK_DGRAM:
678 		sock->ops = &unix_dgram_ops;
679 		break;
680 	case SOCK_SEQPACKET:
681 		sock->ops = &unix_seqpacket_ops;
682 		break;
683 	default:
684 		return -ESOCKTNOSUPPORT;
685 	}
686 
687 	return unix_create1(net, sock) ? 0 : -ENOMEM;
688 }
689 
690 static int unix_release(struct socket *sock)
691 {
692 	struct sock *sk = sock->sk;
693 
694 	if (!sk)
695 		return 0;
696 
697 	sock->sk = NULL;
698 
699 	return unix_release_sock(sk, 0);
700 }
701 
702 static int unix_autobind(struct socket *sock)
703 {
704 	struct sock *sk = sock->sk;
705 	struct net *net = sock_net(sk);
706 	struct unix_sock *u = unix_sk(sk);
707 	static u32 ordernum = 1;
708 	struct unix_address *addr;
709 	int err;
710 	unsigned int retries = 0;
711 
712 	mutex_lock(&u->readlock);
713 
714 	err = 0;
715 	if (u->addr)
716 		goto out;
717 
718 	err = -ENOMEM;
719 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
720 	if (!addr)
721 		goto out;
722 
723 	addr->name->sun_family = AF_UNIX;
724 	atomic_set(&addr->refcnt, 1);
725 
726 retry:
727 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
728 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
729 
730 	spin_lock(&unix_table_lock);
731 	ordernum = (ordernum+1)&0xFFFFF;
732 
733 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
734 				      addr->hash)) {
735 		spin_unlock(&unix_table_lock);
736 		/*
737 		 * __unix_find_socket_byname() may take long time if many names
738 		 * are already in use.
739 		 */
740 		cond_resched();
741 		/* Give up if all names seems to be in use. */
742 		if (retries++ == 0xFFFFF) {
743 			err = -ENOSPC;
744 			kfree(addr);
745 			goto out;
746 		}
747 		goto retry;
748 	}
749 	addr->hash ^= sk->sk_type;
750 
751 	__unix_remove_socket(sk);
752 	u->addr = addr;
753 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
754 	spin_unlock(&unix_table_lock);
755 	err = 0;
756 
757 out:	mutex_unlock(&u->readlock);
758 	return err;
759 }
760 
761 static struct sock *unix_find_other(struct net *net,
762 				    struct sockaddr_un *sunname, int len,
763 				    int type, unsigned hash, int *error)
764 {
765 	struct sock *u;
766 	struct path path;
767 	int err = 0;
768 
769 	if (sunname->sun_path[0]) {
770 		struct inode *inode;
771 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
772 		if (err)
773 			goto fail;
774 		inode = path.dentry->d_inode;
775 		err = inode_permission(inode, MAY_WRITE);
776 		if (err)
777 			goto put_fail;
778 
779 		err = -ECONNREFUSED;
780 		if (!S_ISSOCK(inode->i_mode))
781 			goto put_fail;
782 		u = unix_find_socket_byinode(inode);
783 		if (!u)
784 			goto put_fail;
785 
786 		if (u->sk_type == type)
787 			touch_atime(&path);
788 
789 		path_put(&path);
790 
791 		err = -EPROTOTYPE;
792 		if (u->sk_type != type) {
793 			sock_put(u);
794 			goto fail;
795 		}
796 	} else {
797 		err = -ECONNREFUSED;
798 		u = unix_find_socket_byname(net, sunname, len, type, hash);
799 		if (u) {
800 			struct dentry *dentry;
801 			dentry = unix_sk(u)->path.dentry;
802 			if (dentry)
803 				touch_atime(&unix_sk(u)->path);
804 		} else
805 			goto fail;
806 	}
807 	return u;
808 
809 put_fail:
810 	path_put(&path);
811 fail:
812 	*error = err;
813 	return NULL;
814 }
815 
816 
817 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
818 {
819 	struct sock *sk = sock->sk;
820 	struct net *net = sock_net(sk);
821 	struct unix_sock *u = unix_sk(sk);
822 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
823 	char *sun_path = sunaddr->sun_path;
824 	struct dentry *dentry = NULL;
825 	struct path path;
826 	int err;
827 	unsigned hash;
828 	struct unix_address *addr;
829 	struct hlist_head *list;
830 
831 	err = -EINVAL;
832 	if (sunaddr->sun_family != AF_UNIX)
833 		goto out;
834 
835 	if (addr_len == sizeof(short)) {
836 		err = unix_autobind(sock);
837 		goto out;
838 	}
839 
840 	err = unix_mkname(sunaddr, addr_len, &hash);
841 	if (err < 0)
842 		goto out;
843 	addr_len = err;
844 
845 	mutex_lock(&u->readlock);
846 
847 	err = -EINVAL;
848 	if (u->addr)
849 		goto out_up;
850 
851 	err = -ENOMEM;
852 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
853 	if (!addr)
854 		goto out_up;
855 
856 	memcpy(addr->name, sunaddr, addr_len);
857 	addr->len = addr_len;
858 	addr->hash = hash ^ sk->sk_type;
859 	atomic_set(&addr->refcnt, 1);
860 
861 	if (sun_path[0]) {
862 		umode_t mode;
863 		err = 0;
864 		/*
865 		 * Get the parent directory, calculate the hash for last
866 		 * component.
867 		 */
868 		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
869 		err = PTR_ERR(dentry);
870 		if (IS_ERR(dentry))
871 			goto out_mknod_parent;
872 
873 		/*
874 		 * All right, let's create it.
875 		 */
876 		mode = S_IFSOCK |
877 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
878 		err = mnt_want_write(path.mnt);
879 		if (err)
880 			goto out_mknod_dput;
881 		err = security_path_mknod(&path, dentry, mode, 0);
882 		if (err)
883 			goto out_mknod_drop_write;
884 		err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
885 out_mknod_drop_write:
886 		mnt_drop_write(path.mnt);
887 		if (err)
888 			goto out_mknod_dput;
889 		mutex_unlock(&path.dentry->d_inode->i_mutex);
890 		dput(path.dentry);
891 		path.dentry = dentry;
892 
893 		addr->hash = UNIX_HASH_SIZE;
894 	}
895 
896 	spin_lock(&unix_table_lock);
897 
898 	if (!sun_path[0]) {
899 		err = -EADDRINUSE;
900 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
901 					      sk->sk_type, hash)) {
902 			unix_release_addr(addr);
903 			goto out_unlock;
904 		}
905 
906 		list = &unix_socket_table[addr->hash];
907 	} else {
908 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
909 		u->path = path;
910 	}
911 
912 	err = 0;
913 	__unix_remove_socket(sk);
914 	u->addr = addr;
915 	__unix_insert_socket(list, sk);
916 
917 out_unlock:
918 	spin_unlock(&unix_table_lock);
919 out_up:
920 	mutex_unlock(&u->readlock);
921 out:
922 	return err;
923 
924 out_mknod_dput:
925 	dput(dentry);
926 	mutex_unlock(&path.dentry->d_inode->i_mutex);
927 	path_put(&path);
928 out_mknod_parent:
929 	if (err == -EEXIST)
930 		err = -EADDRINUSE;
931 	unix_release_addr(addr);
932 	goto out_up;
933 }
934 
935 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
936 {
937 	if (unlikely(sk1 == sk2) || !sk2) {
938 		unix_state_lock(sk1);
939 		return;
940 	}
941 	if (sk1 < sk2) {
942 		unix_state_lock(sk1);
943 		unix_state_lock_nested(sk2);
944 	} else {
945 		unix_state_lock(sk2);
946 		unix_state_lock_nested(sk1);
947 	}
948 }
949 
950 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
951 {
952 	if (unlikely(sk1 == sk2) || !sk2) {
953 		unix_state_unlock(sk1);
954 		return;
955 	}
956 	unix_state_unlock(sk1);
957 	unix_state_unlock(sk2);
958 }
959 
960 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
961 			      int alen, int flags)
962 {
963 	struct sock *sk = sock->sk;
964 	struct net *net = sock_net(sk);
965 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
966 	struct sock *other;
967 	unsigned hash;
968 	int err;
969 
970 	if (addr->sa_family != AF_UNSPEC) {
971 		err = unix_mkname(sunaddr, alen, &hash);
972 		if (err < 0)
973 			goto out;
974 		alen = err;
975 
976 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
977 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
978 			goto out;
979 
980 restart:
981 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
982 		if (!other)
983 			goto out;
984 
985 		unix_state_double_lock(sk, other);
986 
987 		/* Apparently VFS overslept socket death. Retry. */
988 		if (sock_flag(other, SOCK_DEAD)) {
989 			unix_state_double_unlock(sk, other);
990 			sock_put(other);
991 			goto restart;
992 		}
993 
994 		err = -EPERM;
995 		if (!unix_may_send(sk, other))
996 			goto out_unlock;
997 
998 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
999 		if (err)
1000 			goto out_unlock;
1001 
1002 	} else {
1003 		/*
1004 		 *	1003.1g breaking connected state with AF_UNSPEC
1005 		 */
1006 		other = NULL;
1007 		unix_state_double_lock(sk, other);
1008 	}
1009 
1010 	/*
1011 	 * If it was connected, reconnect.
1012 	 */
1013 	if (unix_peer(sk)) {
1014 		struct sock *old_peer = unix_peer(sk);
1015 		unix_peer(sk) = other;
1016 		unix_state_double_unlock(sk, other);
1017 
1018 		if (other != old_peer)
1019 			unix_dgram_disconnected(sk, old_peer);
1020 		sock_put(old_peer);
1021 	} else {
1022 		unix_peer(sk) = other;
1023 		unix_state_double_unlock(sk, other);
1024 	}
1025 	return 0;
1026 
1027 out_unlock:
1028 	unix_state_double_unlock(sk, other);
1029 	sock_put(other);
1030 out:
1031 	return err;
1032 }
1033 
1034 static long unix_wait_for_peer(struct sock *other, long timeo)
1035 {
1036 	struct unix_sock *u = unix_sk(other);
1037 	int sched;
1038 	DEFINE_WAIT(wait);
1039 
1040 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1041 
1042 	sched = !sock_flag(other, SOCK_DEAD) &&
1043 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1044 		unix_recvq_full(other);
1045 
1046 	unix_state_unlock(other);
1047 
1048 	if (sched)
1049 		timeo = schedule_timeout(timeo);
1050 
1051 	finish_wait(&u->peer_wait, &wait);
1052 	return timeo;
1053 }
1054 
1055 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1056 			       int addr_len, int flags)
1057 {
1058 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1059 	struct sock *sk = sock->sk;
1060 	struct net *net = sock_net(sk);
1061 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1062 	struct sock *newsk = NULL;
1063 	struct sock *other = NULL;
1064 	struct sk_buff *skb = NULL;
1065 	unsigned hash;
1066 	int st;
1067 	int err;
1068 	long timeo;
1069 
1070 	err = unix_mkname(sunaddr, addr_len, &hash);
1071 	if (err < 0)
1072 		goto out;
1073 	addr_len = err;
1074 
1075 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1076 	    (err = unix_autobind(sock)) != 0)
1077 		goto out;
1078 
1079 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1080 
1081 	/* First of all allocate resources.
1082 	   If we will make it after state is locked,
1083 	   we will have to recheck all again in any case.
1084 	 */
1085 
1086 	err = -ENOMEM;
1087 
1088 	/* create new sock for complete connection */
1089 	newsk = unix_create1(sock_net(sk), NULL);
1090 	if (newsk == NULL)
1091 		goto out;
1092 
1093 	/* Allocate skb for sending to listening sock */
1094 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1095 	if (skb == NULL)
1096 		goto out;
1097 
1098 restart:
1099 	/*  Find listening sock. */
1100 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1101 	if (!other)
1102 		goto out;
1103 
1104 	/* Latch state of peer */
1105 	unix_state_lock(other);
1106 
1107 	/* Apparently VFS overslept socket death. Retry. */
1108 	if (sock_flag(other, SOCK_DEAD)) {
1109 		unix_state_unlock(other);
1110 		sock_put(other);
1111 		goto restart;
1112 	}
1113 
1114 	err = -ECONNREFUSED;
1115 	if (other->sk_state != TCP_LISTEN)
1116 		goto out_unlock;
1117 	if (other->sk_shutdown & RCV_SHUTDOWN)
1118 		goto out_unlock;
1119 
1120 	if (unix_recvq_full(other)) {
1121 		err = -EAGAIN;
1122 		if (!timeo)
1123 			goto out_unlock;
1124 
1125 		timeo = unix_wait_for_peer(other, timeo);
1126 
1127 		err = sock_intr_errno(timeo);
1128 		if (signal_pending(current))
1129 			goto out;
1130 		sock_put(other);
1131 		goto restart;
1132 	}
1133 
1134 	/* Latch our state.
1135 
1136 	   It is tricky place. We need to grab our state lock and cannot
1137 	   drop lock on peer. It is dangerous because deadlock is
1138 	   possible. Connect to self case and simultaneous
1139 	   attempt to connect are eliminated by checking socket
1140 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1141 	   check this before attempt to grab lock.
1142 
1143 	   Well, and we have to recheck the state after socket locked.
1144 	 */
1145 	st = sk->sk_state;
1146 
1147 	switch (st) {
1148 	case TCP_CLOSE:
1149 		/* This is ok... continue with connect */
1150 		break;
1151 	case TCP_ESTABLISHED:
1152 		/* Socket is already connected */
1153 		err = -EISCONN;
1154 		goto out_unlock;
1155 	default:
1156 		err = -EINVAL;
1157 		goto out_unlock;
1158 	}
1159 
1160 	unix_state_lock_nested(sk);
1161 
1162 	if (sk->sk_state != st) {
1163 		unix_state_unlock(sk);
1164 		unix_state_unlock(other);
1165 		sock_put(other);
1166 		goto restart;
1167 	}
1168 
1169 	err = security_unix_stream_connect(sk, other, newsk);
1170 	if (err) {
1171 		unix_state_unlock(sk);
1172 		goto out_unlock;
1173 	}
1174 
1175 	/* The way is open! Fastly set all the necessary fields... */
1176 
1177 	sock_hold(sk);
1178 	unix_peer(newsk)	= sk;
1179 	newsk->sk_state		= TCP_ESTABLISHED;
1180 	newsk->sk_type		= sk->sk_type;
1181 	init_peercred(newsk);
1182 	newu = unix_sk(newsk);
1183 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1184 	otheru = unix_sk(other);
1185 
1186 	/* copy address information from listening to new sock*/
1187 	if (otheru->addr) {
1188 		atomic_inc(&otheru->addr->refcnt);
1189 		newu->addr = otheru->addr;
1190 	}
1191 	if (otheru->path.dentry) {
1192 		path_get(&otheru->path);
1193 		newu->path = otheru->path;
1194 	}
1195 
1196 	/* Set credentials */
1197 	copy_peercred(sk, other);
1198 
1199 	sock->state	= SS_CONNECTED;
1200 	sk->sk_state	= TCP_ESTABLISHED;
1201 	sock_hold(newsk);
1202 
1203 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1204 	unix_peer(sk)	= newsk;
1205 
1206 	unix_state_unlock(sk);
1207 
1208 	/* take ten and and send info to listening sock */
1209 	spin_lock(&other->sk_receive_queue.lock);
1210 	__skb_queue_tail(&other->sk_receive_queue, skb);
1211 	spin_unlock(&other->sk_receive_queue.lock);
1212 	unix_state_unlock(other);
1213 	other->sk_data_ready(other, 0);
1214 	sock_put(other);
1215 	return 0;
1216 
1217 out_unlock:
1218 	if (other)
1219 		unix_state_unlock(other);
1220 
1221 out:
1222 	kfree_skb(skb);
1223 	if (newsk)
1224 		unix_release_sock(newsk, 0);
1225 	if (other)
1226 		sock_put(other);
1227 	return err;
1228 }
1229 
1230 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1231 {
1232 	struct sock *ska = socka->sk, *skb = sockb->sk;
1233 
1234 	/* Join our sockets back to back */
1235 	sock_hold(ska);
1236 	sock_hold(skb);
1237 	unix_peer(ska) = skb;
1238 	unix_peer(skb) = ska;
1239 	init_peercred(ska);
1240 	init_peercred(skb);
1241 
1242 	if (ska->sk_type != SOCK_DGRAM) {
1243 		ska->sk_state = TCP_ESTABLISHED;
1244 		skb->sk_state = TCP_ESTABLISHED;
1245 		socka->state  = SS_CONNECTED;
1246 		sockb->state  = SS_CONNECTED;
1247 	}
1248 	return 0;
1249 }
1250 
1251 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1252 {
1253 	struct sock *sk = sock->sk;
1254 	struct sock *tsk;
1255 	struct sk_buff *skb;
1256 	int err;
1257 
1258 	err = -EOPNOTSUPP;
1259 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1260 		goto out;
1261 
1262 	err = -EINVAL;
1263 	if (sk->sk_state != TCP_LISTEN)
1264 		goto out;
1265 
1266 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1267 	 * so that no locks are necessary.
1268 	 */
1269 
1270 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1271 	if (!skb) {
1272 		/* This means receive shutdown. */
1273 		if (err == 0)
1274 			err = -EINVAL;
1275 		goto out;
1276 	}
1277 
1278 	tsk = skb->sk;
1279 	skb_free_datagram(sk, skb);
1280 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1281 
1282 	/* attach accepted sock to socket */
1283 	unix_state_lock(tsk);
1284 	newsock->state = SS_CONNECTED;
1285 	sock_graft(tsk, newsock);
1286 	unix_state_unlock(tsk);
1287 	return 0;
1288 
1289 out:
1290 	return err;
1291 }
1292 
1293 
1294 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1295 {
1296 	struct sock *sk = sock->sk;
1297 	struct unix_sock *u;
1298 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1299 	int err = 0;
1300 
1301 	if (peer) {
1302 		sk = unix_peer_get(sk);
1303 
1304 		err = -ENOTCONN;
1305 		if (!sk)
1306 			goto out;
1307 		err = 0;
1308 	} else {
1309 		sock_hold(sk);
1310 	}
1311 
1312 	u = unix_sk(sk);
1313 	unix_state_lock(sk);
1314 	if (!u->addr) {
1315 		sunaddr->sun_family = AF_UNIX;
1316 		sunaddr->sun_path[0] = 0;
1317 		*uaddr_len = sizeof(short);
1318 	} else {
1319 		struct unix_address *addr = u->addr;
1320 
1321 		*uaddr_len = addr->len;
1322 		memcpy(sunaddr, addr->name, *uaddr_len);
1323 	}
1324 	unix_state_unlock(sk);
1325 	sock_put(sk);
1326 out:
1327 	return err;
1328 }
1329 
1330 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1331 {
1332 	int i;
1333 
1334 	scm->fp = UNIXCB(skb).fp;
1335 	UNIXCB(skb).fp = NULL;
1336 
1337 	for (i = scm->fp->count-1; i >= 0; i--)
1338 		unix_notinflight(scm->fp->fp[i]);
1339 }
1340 
1341 static void unix_destruct_scm(struct sk_buff *skb)
1342 {
1343 	struct scm_cookie scm;
1344 	memset(&scm, 0, sizeof(scm));
1345 	scm.pid  = UNIXCB(skb).pid;
1346 	scm.cred = UNIXCB(skb).cred;
1347 	if (UNIXCB(skb).fp)
1348 		unix_detach_fds(&scm, skb);
1349 
1350 	/* Alas, it calls VFS */
1351 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1352 	scm_destroy(&scm);
1353 	sock_wfree(skb);
1354 }
1355 
1356 #define MAX_RECURSION_LEVEL 4
1357 
1358 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1359 {
1360 	int i;
1361 	unsigned char max_level = 0;
1362 	int unix_sock_count = 0;
1363 
1364 	for (i = scm->fp->count - 1; i >= 0; i--) {
1365 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1366 
1367 		if (sk) {
1368 			unix_sock_count++;
1369 			max_level = max(max_level,
1370 					unix_sk(sk)->recursion_level);
1371 		}
1372 	}
1373 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1374 		return -ETOOMANYREFS;
1375 
1376 	/*
1377 	 * Need to duplicate file references for the sake of garbage
1378 	 * collection.  Otherwise a socket in the fps might become a
1379 	 * candidate for GC while the skb is not yet queued.
1380 	 */
1381 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1382 	if (!UNIXCB(skb).fp)
1383 		return -ENOMEM;
1384 
1385 	if (unix_sock_count) {
1386 		for (i = scm->fp->count - 1; i >= 0; i--)
1387 			unix_inflight(scm->fp->fp[i]);
1388 	}
1389 	return max_level;
1390 }
1391 
1392 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1393 {
1394 	int err = 0;
1395 
1396 	UNIXCB(skb).pid  = get_pid(scm->pid);
1397 	if (scm->cred)
1398 		UNIXCB(skb).cred = get_cred(scm->cred);
1399 	UNIXCB(skb).fp = NULL;
1400 	if (scm->fp && send_fds)
1401 		err = unix_attach_fds(scm, skb);
1402 
1403 	skb->destructor = unix_destruct_scm;
1404 	return err;
1405 }
1406 
1407 /*
1408  * Some apps rely on write() giving SCM_CREDENTIALS
1409  * We include credentials if source or destination socket
1410  * asserted SOCK_PASSCRED.
1411  */
1412 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1413 			    const struct sock *other)
1414 {
1415 	if (UNIXCB(skb).cred)
1416 		return;
1417 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1418 	    !other->sk_socket ||
1419 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1420 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1421 		UNIXCB(skb).cred = get_current_cred();
1422 	}
1423 }
1424 
1425 /*
1426  *	Send AF_UNIX data.
1427  */
1428 
1429 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1430 			      struct msghdr *msg, size_t len)
1431 {
1432 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1433 	struct sock *sk = sock->sk;
1434 	struct net *net = sock_net(sk);
1435 	struct unix_sock *u = unix_sk(sk);
1436 	struct sockaddr_un *sunaddr = msg->msg_name;
1437 	struct sock *other = NULL;
1438 	int namelen = 0; /* fake GCC */
1439 	int err;
1440 	unsigned hash;
1441 	struct sk_buff *skb;
1442 	long timeo;
1443 	struct scm_cookie tmp_scm;
1444 	int max_level;
1445 
1446 	if (NULL == siocb->scm)
1447 		siocb->scm = &tmp_scm;
1448 	wait_for_unix_gc();
1449 	err = scm_send(sock, msg, siocb->scm);
1450 	if (err < 0)
1451 		return err;
1452 
1453 	err = -EOPNOTSUPP;
1454 	if (msg->msg_flags&MSG_OOB)
1455 		goto out;
1456 
1457 	if (msg->msg_namelen) {
1458 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1459 		if (err < 0)
1460 			goto out;
1461 		namelen = err;
1462 	} else {
1463 		sunaddr = NULL;
1464 		err = -ENOTCONN;
1465 		other = unix_peer_get(sk);
1466 		if (!other)
1467 			goto out;
1468 	}
1469 
1470 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1471 	    && (err = unix_autobind(sock)) != 0)
1472 		goto out;
1473 
1474 	err = -EMSGSIZE;
1475 	if (len > sk->sk_sndbuf - 32)
1476 		goto out;
1477 
1478 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1479 	if (skb == NULL)
1480 		goto out;
1481 
1482 	err = unix_scm_to_skb(siocb->scm, skb, true);
1483 	if (err < 0)
1484 		goto out_free;
1485 	max_level = err + 1;
1486 	unix_get_secdata(siocb->scm, skb);
1487 
1488 	skb_reset_transport_header(skb);
1489 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1490 	if (err)
1491 		goto out_free;
1492 
1493 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1494 
1495 restart:
1496 	if (!other) {
1497 		err = -ECONNRESET;
1498 		if (sunaddr == NULL)
1499 			goto out_free;
1500 
1501 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1502 					hash, &err);
1503 		if (other == NULL)
1504 			goto out_free;
1505 	}
1506 
1507 	if (sk_filter(other, skb) < 0) {
1508 		/* Toss the packet but do not return any error to the sender */
1509 		err = len;
1510 		goto out_free;
1511 	}
1512 
1513 	unix_state_lock(other);
1514 	err = -EPERM;
1515 	if (!unix_may_send(sk, other))
1516 		goto out_unlock;
1517 
1518 	if (sock_flag(other, SOCK_DEAD)) {
1519 		/*
1520 		 *	Check with 1003.1g - what should
1521 		 *	datagram error
1522 		 */
1523 		unix_state_unlock(other);
1524 		sock_put(other);
1525 
1526 		err = 0;
1527 		unix_state_lock(sk);
1528 		if (unix_peer(sk) == other) {
1529 			unix_peer(sk) = NULL;
1530 			unix_state_unlock(sk);
1531 
1532 			unix_dgram_disconnected(sk, other);
1533 			sock_put(other);
1534 			err = -ECONNREFUSED;
1535 		} else {
1536 			unix_state_unlock(sk);
1537 		}
1538 
1539 		other = NULL;
1540 		if (err)
1541 			goto out_free;
1542 		goto restart;
1543 	}
1544 
1545 	err = -EPIPE;
1546 	if (other->sk_shutdown & RCV_SHUTDOWN)
1547 		goto out_unlock;
1548 
1549 	if (sk->sk_type != SOCK_SEQPACKET) {
1550 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1551 		if (err)
1552 			goto out_unlock;
1553 	}
1554 
1555 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1556 		if (!timeo) {
1557 			err = -EAGAIN;
1558 			goto out_unlock;
1559 		}
1560 
1561 		timeo = unix_wait_for_peer(other, timeo);
1562 
1563 		err = sock_intr_errno(timeo);
1564 		if (signal_pending(current))
1565 			goto out_free;
1566 
1567 		goto restart;
1568 	}
1569 
1570 	if (sock_flag(other, SOCK_RCVTSTAMP))
1571 		__net_timestamp(skb);
1572 	maybe_add_creds(skb, sock, other);
1573 	skb_queue_tail(&other->sk_receive_queue, skb);
1574 	if (max_level > unix_sk(other)->recursion_level)
1575 		unix_sk(other)->recursion_level = max_level;
1576 	unix_state_unlock(other);
1577 	other->sk_data_ready(other, len);
1578 	sock_put(other);
1579 	scm_destroy(siocb->scm);
1580 	return len;
1581 
1582 out_unlock:
1583 	unix_state_unlock(other);
1584 out_free:
1585 	kfree_skb(skb);
1586 out:
1587 	if (other)
1588 		sock_put(other);
1589 	scm_destroy(siocb->scm);
1590 	return err;
1591 }
1592 
1593 
1594 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1595 			       struct msghdr *msg, size_t len)
1596 {
1597 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1598 	struct sock *sk = sock->sk;
1599 	struct sock *other = NULL;
1600 	int err, size;
1601 	struct sk_buff *skb;
1602 	int sent = 0;
1603 	struct scm_cookie tmp_scm;
1604 	bool fds_sent = false;
1605 	int max_level;
1606 
1607 	if (NULL == siocb->scm)
1608 		siocb->scm = &tmp_scm;
1609 	wait_for_unix_gc();
1610 	err = scm_send(sock, msg, siocb->scm);
1611 	if (err < 0)
1612 		return err;
1613 
1614 	err = -EOPNOTSUPP;
1615 	if (msg->msg_flags&MSG_OOB)
1616 		goto out_err;
1617 
1618 	if (msg->msg_namelen) {
1619 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1620 		goto out_err;
1621 	} else {
1622 		err = -ENOTCONN;
1623 		other = unix_peer(sk);
1624 		if (!other)
1625 			goto out_err;
1626 	}
1627 
1628 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1629 		goto pipe_err;
1630 
1631 	while (sent < len) {
1632 		/*
1633 		 *	Optimisation for the fact that under 0.01% of X
1634 		 *	messages typically need breaking up.
1635 		 */
1636 
1637 		size = len-sent;
1638 
1639 		/* Keep two messages in the pipe so it schedules better */
1640 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1641 			size = (sk->sk_sndbuf >> 1) - 64;
1642 
1643 		if (size > SKB_MAX_ALLOC)
1644 			size = SKB_MAX_ALLOC;
1645 
1646 		/*
1647 		 *	Grab a buffer
1648 		 */
1649 
1650 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1651 					  &err);
1652 
1653 		if (skb == NULL)
1654 			goto out_err;
1655 
1656 		/*
1657 		 *	If you pass two values to the sock_alloc_send_skb
1658 		 *	it tries to grab the large buffer with GFP_NOFS
1659 		 *	(which can fail easily), and if it fails grab the
1660 		 *	fallback size buffer which is under a page and will
1661 		 *	succeed. [Alan]
1662 		 */
1663 		size = min_t(int, size, skb_tailroom(skb));
1664 
1665 
1666 		/* Only send the fds in the first buffer */
1667 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1668 		if (err < 0) {
1669 			kfree_skb(skb);
1670 			goto out_err;
1671 		}
1672 		max_level = err + 1;
1673 		fds_sent = true;
1674 
1675 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1676 		if (err) {
1677 			kfree_skb(skb);
1678 			goto out_err;
1679 		}
1680 
1681 		unix_state_lock(other);
1682 
1683 		if (sock_flag(other, SOCK_DEAD) ||
1684 		    (other->sk_shutdown & RCV_SHUTDOWN))
1685 			goto pipe_err_free;
1686 
1687 		maybe_add_creds(skb, sock, other);
1688 		skb_queue_tail(&other->sk_receive_queue, skb);
1689 		if (max_level > unix_sk(other)->recursion_level)
1690 			unix_sk(other)->recursion_level = max_level;
1691 		unix_state_unlock(other);
1692 		other->sk_data_ready(other, size);
1693 		sent += size;
1694 	}
1695 
1696 	scm_destroy(siocb->scm);
1697 	siocb->scm = NULL;
1698 
1699 	return sent;
1700 
1701 pipe_err_free:
1702 	unix_state_unlock(other);
1703 	kfree_skb(skb);
1704 pipe_err:
1705 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1706 		send_sig(SIGPIPE, current, 0);
1707 	err = -EPIPE;
1708 out_err:
1709 	scm_destroy(siocb->scm);
1710 	siocb->scm = NULL;
1711 	return sent ? : err;
1712 }
1713 
1714 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1715 				  struct msghdr *msg, size_t len)
1716 {
1717 	int err;
1718 	struct sock *sk = sock->sk;
1719 
1720 	err = sock_error(sk);
1721 	if (err)
1722 		return err;
1723 
1724 	if (sk->sk_state != TCP_ESTABLISHED)
1725 		return -ENOTCONN;
1726 
1727 	if (msg->msg_namelen)
1728 		msg->msg_namelen = 0;
1729 
1730 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1731 }
1732 
1733 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1734 			      struct msghdr *msg, size_t size,
1735 			      int flags)
1736 {
1737 	struct sock *sk = sock->sk;
1738 
1739 	if (sk->sk_state != TCP_ESTABLISHED)
1740 		return -ENOTCONN;
1741 
1742 	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1743 }
1744 
1745 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1746 {
1747 	struct unix_sock *u = unix_sk(sk);
1748 
1749 	msg->msg_namelen = 0;
1750 	if (u->addr) {
1751 		msg->msg_namelen = u->addr->len;
1752 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1753 	}
1754 }
1755 
1756 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1757 			      struct msghdr *msg, size_t size,
1758 			      int flags)
1759 {
1760 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1761 	struct scm_cookie tmp_scm;
1762 	struct sock *sk = sock->sk;
1763 	struct unix_sock *u = unix_sk(sk);
1764 	int noblock = flags & MSG_DONTWAIT;
1765 	struct sk_buff *skb;
1766 	int err;
1767 	int peeked, skip;
1768 
1769 	err = -EOPNOTSUPP;
1770 	if (flags&MSG_OOB)
1771 		goto out;
1772 
1773 	msg->msg_namelen = 0;
1774 
1775 	err = mutex_lock_interruptible(&u->readlock);
1776 	if (err) {
1777 		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1778 		goto out;
1779 	}
1780 
1781 	skip = sk_peek_offset(sk, flags);
1782 
1783 	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1784 	if (!skb) {
1785 		unix_state_lock(sk);
1786 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1787 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1788 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1789 			err = 0;
1790 		unix_state_unlock(sk);
1791 		goto out_unlock;
1792 	}
1793 
1794 	wake_up_interruptible_sync_poll(&u->peer_wait,
1795 					POLLOUT | POLLWRNORM | POLLWRBAND);
1796 
1797 	if (msg->msg_name)
1798 		unix_copy_addr(msg, skb->sk);
1799 
1800 	if (size > skb->len - skip)
1801 		size = skb->len - skip;
1802 	else if (size < skb->len - skip)
1803 		msg->msg_flags |= MSG_TRUNC;
1804 
1805 	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1806 	if (err)
1807 		goto out_free;
1808 
1809 	if (sock_flag(sk, SOCK_RCVTSTAMP))
1810 		__sock_recv_timestamp(msg, sk, skb);
1811 
1812 	if (!siocb->scm) {
1813 		siocb->scm = &tmp_scm;
1814 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1815 	}
1816 	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1817 	unix_set_secdata(siocb->scm, skb);
1818 
1819 	if (!(flags & MSG_PEEK)) {
1820 		if (UNIXCB(skb).fp)
1821 			unix_detach_fds(siocb->scm, skb);
1822 
1823 		sk_peek_offset_bwd(sk, skb->len);
1824 	} else {
1825 		/* It is questionable: on PEEK we could:
1826 		   - do not return fds - good, but too simple 8)
1827 		   - return fds, and do not return them on read (old strategy,
1828 		     apparently wrong)
1829 		   - clone fds (I chose it for now, it is the most universal
1830 		     solution)
1831 
1832 		   POSIX 1003.1g does not actually define this clearly
1833 		   at all. POSIX 1003.1g doesn't define a lot of things
1834 		   clearly however!
1835 
1836 		*/
1837 
1838 		sk_peek_offset_fwd(sk, size);
1839 
1840 		if (UNIXCB(skb).fp)
1841 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1842 	}
1843 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1844 
1845 	scm_recv(sock, msg, siocb->scm, flags);
1846 
1847 out_free:
1848 	skb_free_datagram(sk, skb);
1849 out_unlock:
1850 	mutex_unlock(&u->readlock);
1851 out:
1852 	return err;
1853 }
1854 
1855 /*
1856  *	Sleep until data has arrive. But check for races..
1857  */
1858 
1859 static long unix_stream_data_wait(struct sock *sk, long timeo)
1860 {
1861 	DEFINE_WAIT(wait);
1862 
1863 	unix_state_lock(sk);
1864 
1865 	for (;;) {
1866 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1867 
1868 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1869 		    sk->sk_err ||
1870 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1871 		    signal_pending(current) ||
1872 		    !timeo)
1873 			break;
1874 
1875 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1876 		unix_state_unlock(sk);
1877 		timeo = schedule_timeout(timeo);
1878 		unix_state_lock(sk);
1879 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1880 	}
1881 
1882 	finish_wait(sk_sleep(sk), &wait);
1883 	unix_state_unlock(sk);
1884 	return timeo;
1885 }
1886 
1887 
1888 
1889 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1890 			       struct msghdr *msg, size_t size,
1891 			       int flags)
1892 {
1893 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1894 	struct scm_cookie tmp_scm;
1895 	struct sock *sk = sock->sk;
1896 	struct unix_sock *u = unix_sk(sk);
1897 	struct sockaddr_un *sunaddr = msg->msg_name;
1898 	int copied = 0;
1899 	int check_creds = 0;
1900 	int target;
1901 	int err = 0;
1902 	long timeo;
1903 	int skip;
1904 
1905 	err = -EINVAL;
1906 	if (sk->sk_state != TCP_ESTABLISHED)
1907 		goto out;
1908 
1909 	err = -EOPNOTSUPP;
1910 	if (flags&MSG_OOB)
1911 		goto out;
1912 
1913 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1914 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1915 
1916 	msg->msg_namelen = 0;
1917 
1918 	/* Lock the socket to prevent queue disordering
1919 	 * while sleeps in memcpy_tomsg
1920 	 */
1921 
1922 	if (!siocb->scm) {
1923 		siocb->scm = &tmp_scm;
1924 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1925 	}
1926 
1927 	err = mutex_lock_interruptible(&u->readlock);
1928 	if (err) {
1929 		err = sock_intr_errno(timeo);
1930 		goto out;
1931 	}
1932 
1933 	skip = sk_peek_offset(sk, flags);
1934 
1935 	do {
1936 		int chunk;
1937 		struct sk_buff *skb;
1938 
1939 		unix_state_lock(sk);
1940 		skb = skb_peek(&sk->sk_receive_queue);
1941 again:
1942 		if (skb == NULL) {
1943 			unix_sk(sk)->recursion_level = 0;
1944 			if (copied >= target)
1945 				goto unlock;
1946 
1947 			/*
1948 			 *	POSIX 1003.1g mandates this order.
1949 			 */
1950 
1951 			err = sock_error(sk);
1952 			if (err)
1953 				goto unlock;
1954 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1955 				goto unlock;
1956 
1957 			unix_state_unlock(sk);
1958 			err = -EAGAIN;
1959 			if (!timeo)
1960 				break;
1961 			mutex_unlock(&u->readlock);
1962 
1963 			timeo = unix_stream_data_wait(sk, timeo);
1964 
1965 			if (signal_pending(current)
1966 			    ||  mutex_lock_interruptible(&u->readlock)) {
1967 				err = sock_intr_errno(timeo);
1968 				goto out;
1969 			}
1970 
1971 			continue;
1972  unlock:
1973 			unix_state_unlock(sk);
1974 			break;
1975 		}
1976 
1977 		if (skip >= skb->len) {
1978 			skip -= skb->len;
1979 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
1980 			goto again;
1981 		}
1982 
1983 		unix_state_unlock(sk);
1984 
1985 		if (check_creds) {
1986 			/* Never glue messages from different writers */
1987 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1988 			    (UNIXCB(skb).cred != siocb->scm->cred))
1989 				break;
1990 		} else {
1991 			/* Copy credentials */
1992 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1993 			check_creds = 1;
1994 		}
1995 
1996 		/* Copy address just once */
1997 		if (sunaddr) {
1998 			unix_copy_addr(msg, skb->sk);
1999 			sunaddr = NULL;
2000 		}
2001 
2002 		chunk = min_t(unsigned int, skb->len - skip, size);
2003 		if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2004 			if (copied == 0)
2005 				copied = -EFAULT;
2006 			break;
2007 		}
2008 		copied += chunk;
2009 		size -= chunk;
2010 
2011 		/* Mark read part of skb as used */
2012 		if (!(flags & MSG_PEEK)) {
2013 			skb_pull(skb, chunk);
2014 
2015 			sk_peek_offset_bwd(sk, chunk);
2016 
2017 			if (UNIXCB(skb).fp)
2018 				unix_detach_fds(siocb->scm, skb);
2019 
2020 			if (skb->len)
2021 				break;
2022 
2023 			skb_unlink(skb, &sk->sk_receive_queue);
2024 			consume_skb(skb);
2025 
2026 			if (siocb->scm->fp)
2027 				break;
2028 		} else {
2029 			/* It is questionable, see note in unix_dgram_recvmsg.
2030 			 */
2031 			if (UNIXCB(skb).fp)
2032 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2033 
2034 			sk_peek_offset_fwd(sk, chunk);
2035 
2036 			break;
2037 		}
2038 	} while (size);
2039 
2040 	mutex_unlock(&u->readlock);
2041 	scm_recv(sock, msg, siocb->scm, flags);
2042 out:
2043 	return copied ? : err;
2044 }
2045 
2046 static int unix_shutdown(struct socket *sock, int mode)
2047 {
2048 	struct sock *sk = sock->sk;
2049 	struct sock *other;
2050 
2051 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2052 
2053 	if (!mode)
2054 		return 0;
2055 
2056 	unix_state_lock(sk);
2057 	sk->sk_shutdown |= mode;
2058 	other = unix_peer(sk);
2059 	if (other)
2060 		sock_hold(other);
2061 	unix_state_unlock(sk);
2062 	sk->sk_state_change(sk);
2063 
2064 	if (other &&
2065 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2066 
2067 		int peer_mode = 0;
2068 
2069 		if (mode&RCV_SHUTDOWN)
2070 			peer_mode |= SEND_SHUTDOWN;
2071 		if (mode&SEND_SHUTDOWN)
2072 			peer_mode |= RCV_SHUTDOWN;
2073 		unix_state_lock(other);
2074 		other->sk_shutdown |= peer_mode;
2075 		unix_state_unlock(other);
2076 		other->sk_state_change(other);
2077 		if (peer_mode == SHUTDOWN_MASK)
2078 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2079 		else if (peer_mode & RCV_SHUTDOWN)
2080 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2081 	}
2082 	if (other)
2083 		sock_put(other);
2084 
2085 	return 0;
2086 }
2087 
2088 long unix_inq_len(struct sock *sk)
2089 {
2090 	struct sk_buff *skb;
2091 	long amount = 0;
2092 
2093 	if (sk->sk_state == TCP_LISTEN)
2094 		return -EINVAL;
2095 
2096 	spin_lock(&sk->sk_receive_queue.lock);
2097 	if (sk->sk_type == SOCK_STREAM ||
2098 	    sk->sk_type == SOCK_SEQPACKET) {
2099 		skb_queue_walk(&sk->sk_receive_queue, skb)
2100 			amount += skb->len;
2101 	} else {
2102 		skb = skb_peek(&sk->sk_receive_queue);
2103 		if (skb)
2104 			amount = skb->len;
2105 	}
2106 	spin_unlock(&sk->sk_receive_queue.lock);
2107 
2108 	return amount;
2109 }
2110 EXPORT_SYMBOL_GPL(unix_inq_len);
2111 
2112 long unix_outq_len(struct sock *sk)
2113 {
2114 	return sk_wmem_alloc_get(sk);
2115 }
2116 EXPORT_SYMBOL_GPL(unix_outq_len);
2117 
2118 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2119 {
2120 	struct sock *sk = sock->sk;
2121 	long amount = 0;
2122 	int err;
2123 
2124 	switch (cmd) {
2125 	case SIOCOUTQ:
2126 		amount = unix_outq_len(sk);
2127 		err = put_user(amount, (int __user *)arg);
2128 		break;
2129 	case SIOCINQ:
2130 		amount = unix_inq_len(sk);
2131 		if (amount < 0)
2132 			err = amount;
2133 		else
2134 			err = put_user(amount, (int __user *)arg);
2135 		break;
2136 	default:
2137 		err = -ENOIOCTLCMD;
2138 		break;
2139 	}
2140 	return err;
2141 }
2142 
2143 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2144 {
2145 	struct sock *sk = sock->sk;
2146 	unsigned int mask;
2147 
2148 	sock_poll_wait(file, sk_sleep(sk), wait);
2149 	mask = 0;
2150 
2151 	/* exceptional events? */
2152 	if (sk->sk_err)
2153 		mask |= POLLERR;
2154 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2155 		mask |= POLLHUP;
2156 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2157 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2158 
2159 	/* readable? */
2160 	if (!skb_queue_empty(&sk->sk_receive_queue))
2161 		mask |= POLLIN | POLLRDNORM;
2162 
2163 	/* Connection-based need to check for termination and startup */
2164 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2165 	    sk->sk_state == TCP_CLOSE)
2166 		mask |= POLLHUP;
2167 
2168 	/*
2169 	 * we set writable also when the other side has shut down the
2170 	 * connection. This prevents stuck sockets.
2171 	 */
2172 	if (unix_writable(sk))
2173 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2174 
2175 	return mask;
2176 }
2177 
2178 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2179 				    poll_table *wait)
2180 {
2181 	struct sock *sk = sock->sk, *other;
2182 	unsigned int mask, writable;
2183 
2184 	sock_poll_wait(file, sk_sleep(sk), wait);
2185 	mask = 0;
2186 
2187 	/* exceptional events? */
2188 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2189 		mask |= POLLERR;
2190 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2191 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2192 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2193 		mask |= POLLHUP;
2194 
2195 	/* readable? */
2196 	if (!skb_queue_empty(&sk->sk_receive_queue))
2197 		mask |= POLLIN | POLLRDNORM;
2198 
2199 	/* Connection-based need to check for termination and startup */
2200 	if (sk->sk_type == SOCK_SEQPACKET) {
2201 		if (sk->sk_state == TCP_CLOSE)
2202 			mask |= POLLHUP;
2203 		/* connection hasn't started yet? */
2204 		if (sk->sk_state == TCP_SYN_SENT)
2205 			return mask;
2206 	}
2207 
2208 	/* No write status requested, avoid expensive OUT tests. */
2209 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2210 		return mask;
2211 
2212 	writable = unix_writable(sk);
2213 	other = unix_peer_get(sk);
2214 	if (other) {
2215 		if (unix_peer(other) != sk) {
2216 			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2217 			if (unix_recvq_full(other))
2218 				writable = 0;
2219 		}
2220 		sock_put(other);
2221 	}
2222 
2223 	if (writable)
2224 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2225 	else
2226 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2227 
2228 	return mask;
2229 }
2230 
2231 #ifdef CONFIG_PROC_FS
2232 static struct sock *first_unix_socket(int *i)
2233 {
2234 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2235 		if (!hlist_empty(&unix_socket_table[*i]))
2236 			return __sk_head(&unix_socket_table[*i]);
2237 	}
2238 	return NULL;
2239 }
2240 
2241 static struct sock *next_unix_socket(int *i, struct sock *s)
2242 {
2243 	struct sock *next = sk_next(s);
2244 	/* More in this chain? */
2245 	if (next)
2246 		return next;
2247 	/* Look for next non-empty chain. */
2248 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2249 		if (!hlist_empty(&unix_socket_table[*i]))
2250 			return __sk_head(&unix_socket_table[*i]);
2251 	}
2252 	return NULL;
2253 }
2254 
2255 struct unix_iter_state {
2256 	struct seq_net_private p;
2257 	int i;
2258 };
2259 
2260 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2261 {
2262 	struct unix_iter_state *iter = seq->private;
2263 	loff_t off = 0;
2264 	struct sock *s;
2265 
2266 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2267 		if (sock_net(s) != seq_file_net(seq))
2268 			continue;
2269 		if (off == pos)
2270 			return s;
2271 		++off;
2272 	}
2273 	return NULL;
2274 }
2275 
2276 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2277 	__acquires(unix_table_lock)
2278 {
2279 	spin_lock(&unix_table_lock);
2280 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2281 }
2282 
2283 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2284 {
2285 	struct unix_iter_state *iter = seq->private;
2286 	struct sock *sk = v;
2287 	++*pos;
2288 
2289 	if (v == SEQ_START_TOKEN)
2290 		sk = first_unix_socket(&iter->i);
2291 	else
2292 		sk = next_unix_socket(&iter->i, sk);
2293 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2294 		sk = next_unix_socket(&iter->i, sk);
2295 	return sk;
2296 }
2297 
2298 static void unix_seq_stop(struct seq_file *seq, void *v)
2299 	__releases(unix_table_lock)
2300 {
2301 	spin_unlock(&unix_table_lock);
2302 }
2303 
2304 static int unix_seq_show(struct seq_file *seq, void *v)
2305 {
2306 
2307 	if (v == SEQ_START_TOKEN)
2308 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2309 			 "Inode Path\n");
2310 	else {
2311 		struct sock *s = v;
2312 		struct unix_sock *u = unix_sk(s);
2313 		unix_state_lock(s);
2314 
2315 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2316 			s,
2317 			atomic_read(&s->sk_refcnt),
2318 			0,
2319 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2320 			s->sk_type,
2321 			s->sk_socket ?
2322 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2323 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2324 			sock_i_ino(s));
2325 
2326 		if (u->addr) {
2327 			int i, len;
2328 			seq_putc(seq, ' ');
2329 
2330 			i = 0;
2331 			len = u->addr->len - sizeof(short);
2332 			if (!UNIX_ABSTRACT(s))
2333 				len--;
2334 			else {
2335 				seq_putc(seq, '@');
2336 				i++;
2337 			}
2338 			for ( ; i < len; i++)
2339 				seq_putc(seq, u->addr->name->sun_path[i]);
2340 		}
2341 		unix_state_unlock(s);
2342 		seq_putc(seq, '\n');
2343 	}
2344 
2345 	return 0;
2346 }
2347 
2348 static const struct seq_operations unix_seq_ops = {
2349 	.start  = unix_seq_start,
2350 	.next   = unix_seq_next,
2351 	.stop   = unix_seq_stop,
2352 	.show   = unix_seq_show,
2353 };
2354 
2355 static int unix_seq_open(struct inode *inode, struct file *file)
2356 {
2357 	return seq_open_net(inode, file, &unix_seq_ops,
2358 			    sizeof(struct unix_iter_state));
2359 }
2360 
2361 static const struct file_operations unix_seq_fops = {
2362 	.owner		= THIS_MODULE,
2363 	.open		= unix_seq_open,
2364 	.read		= seq_read,
2365 	.llseek		= seq_lseek,
2366 	.release	= seq_release_net,
2367 };
2368 
2369 #endif
2370 
2371 static const struct net_proto_family unix_family_ops = {
2372 	.family = PF_UNIX,
2373 	.create = unix_create,
2374 	.owner	= THIS_MODULE,
2375 };
2376 
2377 
2378 static int __net_init unix_net_init(struct net *net)
2379 {
2380 	int error = -ENOMEM;
2381 
2382 	net->unx.sysctl_max_dgram_qlen = 10;
2383 	if (unix_sysctl_register(net))
2384 		goto out;
2385 
2386 #ifdef CONFIG_PROC_FS
2387 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2388 		unix_sysctl_unregister(net);
2389 		goto out;
2390 	}
2391 #endif
2392 	error = 0;
2393 out:
2394 	return error;
2395 }
2396 
2397 static void __net_exit unix_net_exit(struct net *net)
2398 {
2399 	unix_sysctl_unregister(net);
2400 	proc_net_remove(net, "unix");
2401 }
2402 
2403 static struct pernet_operations unix_net_ops = {
2404 	.init = unix_net_init,
2405 	.exit = unix_net_exit,
2406 };
2407 
2408 static int __init af_unix_init(void)
2409 {
2410 	int rc = -1;
2411 	struct sk_buff *dummy_skb;
2412 
2413 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2414 
2415 	rc = proto_register(&unix_proto, 1);
2416 	if (rc != 0) {
2417 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2418 		       __func__);
2419 		goto out;
2420 	}
2421 
2422 	sock_register(&unix_family_ops);
2423 	register_pernet_subsys(&unix_net_ops);
2424 out:
2425 	return rc;
2426 }
2427 
2428 static void __exit af_unix_exit(void)
2429 {
2430 	sock_unregister(PF_UNIX);
2431 	proto_unregister(&unix_proto);
2432 	unregister_pernet_subsys(&unix_net_ops);
2433 }
2434 
2435 /* Earlier than device_initcall() so that other drivers invoking
2436    request_module() don't end up in a loop when modprobe tries
2437    to use a UNIX socket. But later than subsys_initcall() because
2438    we depend on stuff initialised there */
2439 fs_initcall(af_unix_init);
2440 module_exit(af_unix_exit);
2441 
2442 MODULE_LICENSE("GPL");
2443 MODULE_ALIAS_NETPROTO(PF_UNIX);
2444